diff --git a/cli/lz/build.sh b/cli/lz/build.sh index 9ba0a67..9a38f89 100755 --- a/cli/lz/build.sh +++ b/cli/lz/build.sh @@ -17,43 +17,29 @@ fi # Building -cc lz_main.c -o main -cc ulzdec.c -o ulzdec -cc ulzenc.c -o ulzenc +cc ulzdec.c $DEBUG_flags -o ulzdec +cc ulzenc.c $DEBUG_flags -o ulzenc $ASM ulzdec.tal ulzdec.rom # Make compressed file -./main && ./main # read example.txt, write compressed.bin - -# C Decoder - -echo "" -echo "C Decoder" -echo "" -./ulzdec a.ulz b.bin && cat b.bin - -# Uxn Decoding - -echo "" -echo "Uxn Decoder" -echo "" -uxncli ulzdec.rom a.ulz b.bin && cat b.bin - -# C Encoder - -echo "" -echo "C Encoder" -echo "" ./ulzenc example.txt a.ulz - -# Uxn Decoding +./ulzdec a.ulz b.bin +echo "" +echo "c decoder:" +echo "" +cat b.bin +echo "" +echo "" +echo "uxn decoder:" +echo "" uxncli ulzdec.rom a.ulz b.bin && cat b.bin -# ./ulzdec a.ulz b.bin && cat b.bin +echo "" +echo "" -rm ./main rm ./ulzdec rm ./ulzenc rm ./a.ulz rm ./b.bin rm ./ulzdec.rom +rm *.sym diff --git a/cli/lz/ulzenc.c b/cli/lz/ulzenc.c index 9e0a6fe..bc3c3bc 100644 --- a/cli/lz/ulzenc.c +++ b/cli/lz/ulzenc.c @@ -3,6 +3,8 @@ /* cc ulzenc.c -o ulzenc && ./ulzenc example.txt */ +enum { MinMatchLength = 4 }; + static int error(const char *name, const char *msg) { @@ -10,37 +12,101 @@ error(const char *name, const char *msg) return 0; } -char *ptr, *a, mem[0x10000]; +char *raw, *mem; int -get_lit(int i) +uxn_lz_compress(void *output, int output_size, const void *input, int input_size) { - return 0; + int dict_len, match_len, i, string_len, match_ctl; + unsigned char *out = output, *combine = 0; + const unsigned char *dict, *dict_best = 0, *in = input, *start = in, + *end = in + input_size; + while(in != end) { + dict_len = (int)(in - start); /* Get available dictionary size (history of + original output) */ + if(dict_len > 256) + dict_len = 256; /* Limit history lookback to 256 bytes */ + dict = in - dict_len; /* Start of dictionary */ + string_len = (int)(end - in); /* Size of the string to search for */ + if(string_len > 0x3FFF + MinMatchLength) + string_len = 0x3FFF + MinMatchLength; + /* ^ Limit string length to what we can fit in 14 bits, plus the minimum + * match length */ + match_len = 0; /* This will hold the length of our best match */ + for(; dict_len; + dict += 1, dict_len -= 1) /* Iterate through the dictionary */ + { + for(i = 0;; i++) /* Find common prefix length with the string */ + { + if(i == string_len) { + match_len = i; + dict_best = dict; + goto done_search; + } + /* ^ If we reach the end of the string, this is the best possible match. + * End. */ + if(in[i] != dict[i % dict_len]) + break; /* Dictionary repeats if we hit the end */ + } + if(i > match_len) { + match_len = i; + dict_best = dict; + } + } + done_search: + if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */ + { + if((output_size -= 2) < 0) + goto overflow; + match_ctl = + match_len - + MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */ + if(match_ctl > + 0x3F) /* Match is long enough to use 2 bytes for the size */ + { + if((output_size -= 1) < 0) + goto overflow; + *out++ = match_ctl >> 8 | 0x40 | + 0x80; /* High byte of the size, with both flags set */ + *out++ = match_ctl; /* Low byte of the size */ + } else /* Use 1 byte for the size */ + { + *out++ = match_ctl | 0x80; /* Set the "dictionary" flag */ + } + *out++ = in - dict_best - + 1; /* Write offset into history. (0 is -1, 1 is -2, ...) */ + in += match_len; /* Advance input by size of the match */ + combine = 0; /* Disable combining previous literal, if any */ + continue; + } + if(combine) /* Combine with previous literal */ + { + if((output_size -= 1) < 0) + goto overflow; + if(++*combine == 127) + combine = 0; /* If the literal hits its size limit, terminate it. */ + } else /* Start a new literal */ + { + if((output_size -= 2) < 0) + goto overflow; + combine = out++; /* Store this address, and later use it to increment the + literal size. */ + *combine = 0; /* The 0 here means literal of length 1. */ + } + *out++ = *in++; /* Write 1 literal byte from the input to the output. */ + } + return (int)(out - (unsigned char *)output); +overflow: + return -1; } int encode_ulz(FILE *src) { - int i, j, ptr = 0, length = 0; - a = malloc(0x10000); - /* load */ - while(fread(a + length, 1, 1, src) && ++length) {} - /* encode */ - for(i = 0; i < length; i++) { - /* try to make a CPY */ - - /* try to make a LIT */ - for(j = i; j - i < 127 && j < length; j++) {} - if(i != j) { - int litlen = j - i; - /* LIT */ - mem[ptr++] = litlen; - /* LIT(body) */ - for(j = i; j - i < litlen + 1; j++) mem[ptr++] = a[j]; - } - i += j - 1; - } - return ptr - 1; + int length = 0; + mem = malloc(1000000), raw = malloc(0x10000); + while(fread(raw + length, 1, 1, src) && ++length) {} + return uxn_lz_compress(mem, 1000000, raw, length); } int @@ -57,7 +123,7 @@ main(int argc, char *argv[]) res = encode_ulz(src); if(!(dst = fopen(argv[2], "wb"))) return !error("Invalid output file", argv[1]); - fwrite(&mem, res, 1, dst); + fwrite(mem, res, 1, dst); printf("Compressed %s -> %s(%d bytes).\n", argv[1], argv[2], res); return 0; } \ No newline at end of file