diff --git a/cli/lz/lz.c b/cli/lz/lz.c new file mode 100644 index 0000000..8a99844 --- /dev/null +++ b/cli/lz/lz.c @@ -0,0 +1,180 @@ +#include "uxn_lz.h" + +enum { MinMatchLength = 4 }; + +int +uxn_lz_compress(void *output, int output_size, const void *input, int input_size) +{ + int dict_len, match_len, i, string_len, match_ctl; + unsigned char *out = output, *combine = 0; + const unsigned char *dict, *dict_best = 0, *in = input, *start = in, *end = in + input_size; + while (in != end) + { + dict_len = (int)(in - start); /* Get available dictionary size (history of original output) */ + if (dict_len > 256) dict_len = 256; /* Limit history lookback to 256 bytes */ + dict = in - dict_len; /* Start of dictionary */ + string_len = (int)(end - in); /* Size of the string to search for */ + if (string_len > 0x3FFF + MinMatchLength) string_len = 0x3FFF + MinMatchLength; + /* ^ Limit string length to what we can fit in 14 bits, plus the minimum match length */ + match_len = 0; /* This will hold the length of our best match */ + for (; dict_len; dict += 1, dict_len -= 1) /* Iterate through the dictionary */ + { + for (i = 0;; i++) /* Find common prefix length with the string */ + { + if (i == string_len) { match_len = i; dict_best = dict; goto done_search; } + /* ^ If we reach the end of the string, this is the best possible match. End. */ + if (in[i] != dict[i % dict_len]) break; /* Dictionary repeats if we hit the end */ + } + if (i > match_len) { match_len = i; dict_best = dict; } + } +done_search: + if (match_len >= MinMatchLength) /* Long enough? Use dictionary match */ + { + if ((output_size -= 2) < 0) goto overflow; + match_ctl = match_len - MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */ + if (match_ctl > 0x3F) /* Match is long enough to use 2 bytes for the size */ + { + if ((output_size -= 1) < 0) goto overflow; + *out++ = match_ctl >> 8 | 0x40 | 0x80; /* High byte of the size, with both flags set */ + *out++ = match_ctl; /* Low byte of the size */ + } + else /* Use 1 byte for the size */ + { + *out++ = match_ctl | 0x80; /* Set the "dictionary" flag */ + } + *out++ = in - dict_best - 1; /* Write offset into history. (0 is -1, 1 is -2, ...) */ + in += match_len; /* Advance input by size of the match */ + combine = 0; /* Disable combining previous literal, if any */ + continue; + } + if (combine) /* Combine with previous literal */ + { + if ((output_size -= 1) < 0) goto overflow; + if (++*combine == 127) combine = 0; /* If the literal hits its size limit, terminate it. */ + } + else /* Start a new literal */ + { + if ((output_size -= 2) < 0) goto overflow; + combine = out++; /* Store this address, and later use it to increment the literal size. */ + *combine = 0; /* The 0 here means literal of length 1. */ + } + *out++ = *in++; /* Write 1 literal byte from the input to the output. */ + } + return (int)(out - (unsigned char *)output); + overflow: return -1; +} + +int +uxn_lz_expand(void *output, int output_size, const void *input, int input_size) +{ + int num, offset, written = 0; + unsigned char *out = output; + const unsigned char *from, *in = input; + while (input_size) + { + num = *in++; + if (num > 127) /* Dictionary */ + { + if ((input_size -= 1) < 0) goto malformed; + num &= 0x7F; + if (num & 0x40) + { + if ((input_size -= 1) < 0) goto malformed; + num = *in++ | num << 8 & 0x3FFF; + } + num += MinMatchLength; + offset = *in++ + 1; + if (offset > written) goto malformed; + from = out + written - offset; + } + else /* Literal */ + { + input_size -= ++num; + if (input_size < 0) goto malformed; + from = in, in += num; + } + if (written + num > output_size) goto overflow; + while (num--) out[written++] = *from++; + } + return written; + overflow: malformed: return -1; +} + +int +uxn_lz_expand_stream(struct uxn_lz_expand_t *a) +{ + /* Copy struct to stack variables for compiler optimizations */ + unsigned char *next_in = a->next_in, *next_out = a->next_out; + int avail_in = a->avail_in, avail_out = a->avail_out; + int dict_len = a->dict_len, copy_num = a->copy_num; + unsigned char dict_read_pos = a->dict_read_pos, dict_write_pos = a->dict_write_pos, *dict = a->dict; + int result = 0; + switch (a->state) + { +case 0: + for (; avail_in;) + { + copy_num = *next_in++; + avail_in--; + if (copy_num > 127) /* Dictionary */ + { + copy_num &= 0x7F; + if (copy_num & 0x40) + { +case 1: + if (!avail_in) { a->state = 1; goto need_more; } + avail_in--; + copy_num = *next_in++ | copy_num << 8 & 0x3FFF; + } + copy_num += MinMatchLength; +case 2: + if (!avail_in) { a->state = 2; goto need_more; } + avail_in--; + dict_read_pos = *next_in++ + 1; + if (dict_read_pos > dict_len) { a->state = 5; result = -1; goto flush; } /* Malformed */ + dict_read_pos = dict_write_pos - dict_read_pos; + if ((dict_len += copy_num) > 256) dict_len = 256; +case 3: + do { + if (!avail_out) { a->state = 3; goto need_more; } + *next_out++ = dict[dict_write_pos++] = dict[dict_read_pos++]; + avail_out--; + } while (--copy_num); + } + else /* Literal */ + { + copy_num++; + if ((dict_len += copy_num) > 256) dict_len = 256; +case 4: + do { + if (!avail_in || !avail_out) { a->state = 4; goto need_more; } + *next_out++ = dict[dict_write_pos++] = *next_in++; + avail_in--, avail_out--; + } while (--copy_num); + } + } + a->state = 0; +case 5:; + } +need_more: flush: + /* Flush stack variables back to struct */ + a->next_in = next_in, a->next_out = next_out; + a->avail_in = avail_in, a->avail_out = avail_out; + a->dict_len = dict_len, a->copy_num = copy_num; + a->dict_read_pos = dict_read_pos, a->dict_write_pos = dict_write_pos; + return result; +} + +unsigned int +uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size) +{ + unsigned int x = seed >> 16, y = seed, c; + unsigned char *in = bytes, *end = in + bytes_size; + for (; in != end; in++) { + c = *in << 8 | *in; + x = x * 0x2443 + c; + y = y * 0x118d + c; + } + return x << 16 | (y & 0xFFFF); +} + diff --git a/cli/lz/lz.h b/cli/lz/lz.h new file mode 100644 index 0000000..3ab6ae9 --- /dev/null +++ b/cli/lz/lz.h @@ -0,0 +1,21 @@ +/* These functions return -1 if there wasn't enough space in output. + * LZDecompress can also return -1 if the input data was malformed, + * Returns the number of bytes written to output on success. */ + +int uxn_lz_compress(void *output, int output_size, const void *input, int input_size); +int uxn_lz_expand(void *output, int output_size, const void *input, int input_size); + +struct uxn_lz_expand_t { + unsigned char *next_in, *next_out; + int avail_in, avail_out; + + int dict_len, copy_num, state; + unsigned char dict_read_pos, dict_write_pos, dict[256]; +}; + +int uxn_lz_expand_stream(struct uxn_lz_expand_t *a); + +#define UXN_CHECKSUM_SEED 0x1234ABCD + +unsigned int uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size); + diff --git a/cli/lz/lzss.c b/cli/lz/lzss.c new file mode 100644 index 0000000..247a536 --- /dev/null +++ b/cli/lz/lzss.c @@ -0,0 +1,181 @@ +/* LZSS encoder-decoder (Haruhiko Okumura; public domain) */ + +#include +#include + +#define EI 11 /* typically 10..13 */ +#define EJ 4 /* typically 4..5 */ +#define P 1 /* If match length <= P then output one character */ +#define N (1 << EI) /* buffer size */ +#define F ((1 << EJ) + 1) /* lookahead buffer size */ + +int bit_buffer = 0, bit_mask = 128; +unsigned long codecount = 0, textcount = 0; +unsigned char buffer[N * 2]; +FILE *infile, *outfile; + +void error(void) +{ + printf("Output error\n"); exit(1); +} + +void putbit1(void) +{ + bit_buffer |= bit_mask; + if ((bit_mask >>= 1) == 0) { + if (fputc(bit_buffer, outfile) == EOF) error(); + bit_buffer = 0; bit_mask = 128; codecount++; + } +} + +void putbit0(void) +{ + if ((bit_mask >>= 1) == 0) { + if (fputc(bit_buffer, outfile) == EOF) error(); + bit_buffer = 0; bit_mask = 128; codecount++; + } +} + +void flush_bit_buffer(void) +{ + if (bit_mask != 128) { + if (fputc(bit_buffer, outfile) == EOF) error(); + codecount++; + } +} + +void output1(int c) +{ + int mask; + + putbit1(); + mask = 256; + while (mask >>= 1) { + if (c & mask) putbit1(); + else putbit0(); + } +} + +void output2(int x, int y) +{ + int mask; + + putbit0(); + mask = N; + while (mask >>= 1) { + if (x & mask) putbit1(); + else putbit0(); + } + mask = (1 << EJ); + while (mask >>= 1) { + if (y & mask) putbit1(); + else putbit0(); + } +} + +void encode(void) +{ + int i, j, f1, x, y, r, s, bufferend, c; + + for (i = 0; i < N - F; i++) buffer[i] = ' '; + for (i = N - F; i < N * 2; i++) { + if ((c = fgetc(infile)) == EOF) break; + buffer[i] = c; textcount++; + } + bufferend = i; r = N - F; s = 0; + while (r < bufferend) { + f1 = (F <= bufferend - r) ? F : bufferend - r; + x = 0; y = 1; c = buffer[r]; + for (i = r - 1; i >= s; i--) + if (buffer[i] == c) { + for (j = 1; j < f1; j++) + if (buffer[i + j] != buffer[r + j]) break; + if (j > y) { + x = i; y = j; + } + } + if (y <= P) { y = 1; output1(c); } + else output2(x & (N - 1), y - 2); + r += y; s += y; + if (r >= N * 2 - F) { + for (i = 0; i < N; i++) buffer[i] = buffer[i + N]; + bufferend -= N; r -= N; s -= N; + while (bufferend < N * 2) { + if ((c = fgetc(infile)) == EOF) break; + buffer[bufferend++] = c; textcount++; + } + } + } + flush_bit_buffer(); + printf("text: %ld bytes\n", textcount); + printf("code: %ld bytes (%ld%%)\n", + codecount, (codecount * 100) / textcount); +} + +int getbit(int n) /* get n bits */ +{ + int i, x; + static int buf, mask = 0; + + x = 0; + for (i = 0; i < n; i++) { + if (mask == 0) { + if ((buf = fgetc(infile)) == EOF) return EOF; + mask = 128; + } + x <<= 1; + if (buf & mask) x++; + mask >>= 1; + } + return x; +} + +void decode(void) +{ + int i, j, k, r, c; + + for (i = 0; i < N - F; i++) buffer[i] = ' '; + r = N - F; + while ((c = getbit(1)) != EOF) { + if (c) { + if ((c = getbit(8)) == EOF) break; + fputc(c, outfile); + buffer[r++] = c; r &= (N - 1); + } else { + if ((i = getbit(EI)) == EOF) break; + if ((j = getbit(EJ)) == EOF) break; + for (k = 0; k <= j + 1; k++) { + c = buffer[(i + k) & (N - 1)]; + fputc(c, outfile); + buffer[r++] = c; r &= (N - 1); + } + } + } +} + +int main(int argc, char *argv[]) +{ + int enc; + char *s; + + if (argc != 4) { + printf("Usage: lzss e/d infile outfile\n\te = encode\td = decode\n"); + return 1; + } + s = argv[1]; + if (s[1] == 0 && (*s == 'd' || *s == 'D' || *s == 'e' || *s == 'E')) + enc = (*s == 'e' || *s == 'E'); + else { + printf("? %s\n", s); return 1; + } + if ((infile = fopen(argv[2], "rb")) == NULL) { + printf("? %s\n", argv[2]); return 1; + } + if ((outfile = fopen(argv[3], "wb")) == NULL) { + printf("? %s\n", argv[3]); return 1; + } + if (enc) encode(); else decode(); + fclose(infile); fclose(outfile); + return 0; +} +