Added compression examples
This commit is contained in:
parent
bd3b9a70b7
commit
7528540caf
|
@ -0,0 +1,180 @@
|
|||
#include "uxn_lz.h"
|
||||
|
||||
enum { MinMatchLength = 4 };
|
||||
|
||||
int
|
||||
uxn_lz_compress(void *output, int output_size, const void *input, int input_size)
|
||||
{
|
||||
int dict_len, match_len, i, string_len, match_ctl;
|
||||
unsigned char *out = output, *combine = 0;
|
||||
const unsigned char *dict, *dict_best = 0, *in = input, *start = in, *end = in + input_size;
|
||||
while (in != end)
|
||||
{
|
||||
dict_len = (int)(in - start); /* Get available dictionary size (history of original output) */
|
||||
if (dict_len > 256) dict_len = 256; /* Limit history lookback to 256 bytes */
|
||||
dict = in - dict_len; /* Start of dictionary */
|
||||
string_len = (int)(end - in); /* Size of the string to search for */
|
||||
if (string_len > 0x3FFF + MinMatchLength) string_len = 0x3FFF + MinMatchLength;
|
||||
/* ^ Limit string length to what we can fit in 14 bits, plus the minimum match length */
|
||||
match_len = 0; /* This will hold the length of our best match */
|
||||
for (; dict_len; dict += 1, dict_len -= 1) /* Iterate through the dictionary */
|
||||
{
|
||||
for (i = 0;; i++) /* Find common prefix length with the string */
|
||||
{
|
||||
if (i == string_len) { match_len = i; dict_best = dict; goto done_search; }
|
||||
/* ^ If we reach the end of the string, this is the best possible match. End. */
|
||||
if (in[i] != dict[i % dict_len]) break; /* Dictionary repeats if we hit the end */
|
||||
}
|
||||
if (i > match_len) { match_len = i; dict_best = dict; }
|
||||
}
|
||||
done_search:
|
||||
if (match_len >= MinMatchLength) /* Long enough? Use dictionary match */
|
||||
{
|
||||
if ((output_size -= 2) < 0) goto overflow;
|
||||
match_ctl = match_len - MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
|
||||
if (match_ctl > 0x3F) /* Match is long enough to use 2 bytes for the size */
|
||||
{
|
||||
if ((output_size -= 1) < 0) goto overflow;
|
||||
*out++ = match_ctl >> 8 | 0x40 | 0x80; /* High byte of the size, with both flags set */
|
||||
*out++ = match_ctl; /* Low byte of the size */
|
||||
}
|
||||
else /* Use 1 byte for the size */
|
||||
{
|
||||
*out++ = match_ctl | 0x80; /* Set the "dictionary" flag */
|
||||
}
|
||||
*out++ = in - dict_best - 1; /* Write offset into history. (0 is -1, 1 is -2, ...) */
|
||||
in += match_len; /* Advance input by size of the match */
|
||||
combine = 0; /* Disable combining previous literal, if any */
|
||||
continue;
|
||||
}
|
||||
if (combine) /* Combine with previous literal */
|
||||
{
|
||||
if ((output_size -= 1) < 0) goto overflow;
|
||||
if (++*combine == 127) combine = 0; /* If the literal hits its size limit, terminate it. */
|
||||
}
|
||||
else /* Start a new literal */
|
||||
{
|
||||
if ((output_size -= 2) < 0) goto overflow;
|
||||
combine = out++; /* Store this address, and later use it to increment the literal size. */
|
||||
*combine = 0; /* The 0 here means literal of length 1. */
|
||||
}
|
||||
*out++ = *in++; /* Write 1 literal byte from the input to the output. */
|
||||
}
|
||||
return (int)(out - (unsigned char *)output);
|
||||
overflow: return -1;
|
||||
}
|
||||
|
||||
int
|
||||
uxn_lz_expand(void *output, int output_size, const void *input, int input_size)
|
||||
{
|
||||
int num, offset, written = 0;
|
||||
unsigned char *out = output;
|
||||
const unsigned char *from, *in = input;
|
||||
while (input_size)
|
||||
{
|
||||
num = *in++;
|
||||
if (num > 127) /* Dictionary */
|
||||
{
|
||||
if ((input_size -= 1) < 0) goto malformed;
|
||||
num &= 0x7F;
|
||||
if (num & 0x40)
|
||||
{
|
||||
if ((input_size -= 1) < 0) goto malformed;
|
||||
num = *in++ | num << 8 & 0x3FFF;
|
||||
}
|
||||
num += MinMatchLength;
|
||||
offset = *in++ + 1;
|
||||
if (offset > written) goto malformed;
|
||||
from = out + written - offset;
|
||||
}
|
||||
else /* Literal */
|
||||
{
|
||||
input_size -= ++num;
|
||||
if (input_size < 0) goto malformed;
|
||||
from = in, in += num;
|
||||
}
|
||||
if (written + num > output_size) goto overflow;
|
||||
while (num--) out[written++] = *from++;
|
||||
}
|
||||
return written;
|
||||
overflow: malformed: return -1;
|
||||
}
|
||||
|
||||
int
|
||||
uxn_lz_expand_stream(struct uxn_lz_expand_t *a)
|
||||
{
|
||||
/* Copy struct to stack variables for compiler optimizations */
|
||||
unsigned char *next_in = a->next_in, *next_out = a->next_out;
|
||||
int avail_in = a->avail_in, avail_out = a->avail_out;
|
||||
int dict_len = a->dict_len, copy_num = a->copy_num;
|
||||
unsigned char dict_read_pos = a->dict_read_pos, dict_write_pos = a->dict_write_pos, *dict = a->dict;
|
||||
int result = 0;
|
||||
switch (a->state)
|
||||
{
|
||||
case 0:
|
||||
for (; avail_in;)
|
||||
{
|
||||
copy_num = *next_in++;
|
||||
avail_in--;
|
||||
if (copy_num > 127) /* Dictionary */
|
||||
{
|
||||
copy_num &= 0x7F;
|
||||
if (copy_num & 0x40)
|
||||
{
|
||||
case 1:
|
||||
if (!avail_in) { a->state = 1; goto need_more; }
|
||||
avail_in--;
|
||||
copy_num = *next_in++ | copy_num << 8 & 0x3FFF;
|
||||
}
|
||||
copy_num += MinMatchLength;
|
||||
case 2:
|
||||
if (!avail_in) { a->state = 2; goto need_more; }
|
||||
avail_in--;
|
||||
dict_read_pos = *next_in++ + 1;
|
||||
if (dict_read_pos > dict_len) { a->state = 5; result = -1; goto flush; } /* Malformed */
|
||||
dict_read_pos = dict_write_pos - dict_read_pos;
|
||||
if ((dict_len += copy_num) > 256) dict_len = 256;
|
||||
case 3:
|
||||
do {
|
||||
if (!avail_out) { a->state = 3; goto need_more; }
|
||||
*next_out++ = dict[dict_write_pos++] = dict[dict_read_pos++];
|
||||
avail_out--;
|
||||
} while (--copy_num);
|
||||
}
|
||||
else /* Literal */
|
||||
{
|
||||
copy_num++;
|
||||
if ((dict_len += copy_num) > 256) dict_len = 256;
|
||||
case 4:
|
||||
do {
|
||||
if (!avail_in || !avail_out) { a->state = 4; goto need_more; }
|
||||
*next_out++ = dict[dict_write_pos++] = *next_in++;
|
||||
avail_in--, avail_out--;
|
||||
} while (--copy_num);
|
||||
}
|
||||
}
|
||||
a->state = 0;
|
||||
case 5:;
|
||||
}
|
||||
need_more: flush:
|
||||
/* Flush stack variables back to struct */
|
||||
a->next_in = next_in, a->next_out = next_out;
|
||||
a->avail_in = avail_in, a->avail_out = avail_out;
|
||||
a->dict_len = dict_len, a->copy_num = copy_num;
|
||||
a->dict_read_pos = dict_read_pos, a->dict_write_pos = dict_write_pos;
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size)
|
||||
{
|
||||
unsigned int x = seed >> 16, y = seed, c;
|
||||
unsigned char *in = bytes, *end = in + bytes_size;
|
||||
for (; in != end; in++) {
|
||||
c = *in << 8 | *in;
|
||||
x = x * 0x2443 + c;
|
||||
y = y * 0x118d + c;
|
||||
}
|
||||
return x << 16 | (y & 0xFFFF);
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
/* These functions return -1 if there wasn't enough space in output.
|
||||
* LZDecompress can also return -1 if the input data was malformed,
|
||||
* Returns the number of bytes written to output on success. */
|
||||
|
||||
int uxn_lz_compress(void *output, int output_size, const void *input, int input_size);
|
||||
int uxn_lz_expand(void *output, int output_size, const void *input, int input_size);
|
||||
|
||||
struct uxn_lz_expand_t {
|
||||
unsigned char *next_in, *next_out;
|
||||
int avail_in, avail_out;
|
||||
|
||||
int dict_len, copy_num, state;
|
||||
unsigned char dict_read_pos, dict_write_pos, dict[256];
|
||||
};
|
||||
|
||||
int uxn_lz_expand_stream(struct uxn_lz_expand_t *a);
|
||||
|
||||
#define UXN_CHECKSUM_SEED 0x1234ABCD
|
||||
|
||||
unsigned int uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size);
|
||||
|
|
@ -0,0 +1,181 @@
|
|||
/* LZSS encoder-decoder (Haruhiko Okumura; public domain) */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#define EI 11 /* typically 10..13 */
|
||||
#define EJ 4 /* typically 4..5 */
|
||||
#define P 1 /* If match length <= P then output one character */
|
||||
#define N (1 << EI) /* buffer size */
|
||||
#define F ((1 << EJ) + 1) /* lookahead buffer size */
|
||||
|
||||
int bit_buffer = 0, bit_mask = 128;
|
||||
unsigned long codecount = 0, textcount = 0;
|
||||
unsigned char buffer[N * 2];
|
||||
FILE *infile, *outfile;
|
||||
|
||||
void error(void)
|
||||
{
|
||||
printf("Output error\n"); exit(1);
|
||||
}
|
||||
|
||||
void putbit1(void)
|
||||
{
|
||||
bit_buffer |= bit_mask;
|
||||
if ((bit_mask >>= 1) == 0) {
|
||||
if (fputc(bit_buffer, outfile) == EOF) error();
|
||||
bit_buffer = 0; bit_mask = 128; codecount++;
|
||||
}
|
||||
}
|
||||
|
||||
void putbit0(void)
|
||||
{
|
||||
if ((bit_mask >>= 1) == 0) {
|
||||
if (fputc(bit_buffer, outfile) == EOF) error();
|
||||
bit_buffer = 0; bit_mask = 128; codecount++;
|
||||
}
|
||||
}
|
||||
|
||||
void flush_bit_buffer(void)
|
||||
{
|
||||
if (bit_mask != 128) {
|
||||
if (fputc(bit_buffer, outfile) == EOF) error();
|
||||
codecount++;
|
||||
}
|
||||
}
|
||||
|
||||
void output1(int c)
|
||||
{
|
||||
int mask;
|
||||
|
||||
putbit1();
|
||||
mask = 256;
|
||||
while (mask >>= 1) {
|
||||
if (c & mask) putbit1();
|
||||
else putbit0();
|
||||
}
|
||||
}
|
||||
|
||||
void output2(int x, int y)
|
||||
{
|
||||
int mask;
|
||||
|
||||
putbit0();
|
||||
mask = N;
|
||||
while (mask >>= 1) {
|
||||
if (x & mask) putbit1();
|
||||
else putbit0();
|
||||
}
|
||||
mask = (1 << EJ);
|
||||
while (mask >>= 1) {
|
||||
if (y & mask) putbit1();
|
||||
else putbit0();
|
||||
}
|
||||
}
|
||||
|
||||
void encode(void)
|
||||
{
|
||||
int i, j, f1, x, y, r, s, bufferend, c;
|
||||
|
||||
for (i = 0; i < N - F; i++) buffer[i] = ' ';
|
||||
for (i = N - F; i < N * 2; i++) {
|
||||
if ((c = fgetc(infile)) == EOF) break;
|
||||
buffer[i] = c; textcount++;
|
||||
}
|
||||
bufferend = i; r = N - F; s = 0;
|
||||
while (r < bufferend) {
|
||||
f1 = (F <= bufferend - r) ? F : bufferend - r;
|
||||
x = 0; y = 1; c = buffer[r];
|
||||
for (i = r - 1; i >= s; i--)
|
||||
if (buffer[i] == c) {
|
||||
for (j = 1; j < f1; j++)
|
||||
if (buffer[i + j] != buffer[r + j]) break;
|
||||
if (j > y) {
|
||||
x = i; y = j;
|
||||
}
|
||||
}
|
||||
if (y <= P) { y = 1; output1(c); }
|
||||
else output2(x & (N - 1), y - 2);
|
||||
r += y; s += y;
|
||||
if (r >= N * 2 - F) {
|
||||
for (i = 0; i < N; i++) buffer[i] = buffer[i + N];
|
||||
bufferend -= N; r -= N; s -= N;
|
||||
while (bufferend < N * 2) {
|
||||
if ((c = fgetc(infile)) == EOF) break;
|
||||
buffer[bufferend++] = c; textcount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
flush_bit_buffer();
|
||||
printf("text: %ld bytes\n", textcount);
|
||||
printf("code: %ld bytes (%ld%%)\n",
|
||||
codecount, (codecount * 100) / textcount);
|
||||
}
|
||||
|
||||
int getbit(int n) /* get n bits */
|
||||
{
|
||||
int i, x;
|
||||
static int buf, mask = 0;
|
||||
|
||||
x = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
if (mask == 0) {
|
||||
if ((buf = fgetc(infile)) == EOF) return EOF;
|
||||
mask = 128;
|
||||
}
|
||||
x <<= 1;
|
||||
if (buf & mask) x++;
|
||||
mask >>= 1;
|
||||
}
|
||||
return x;
|
||||
}
|
||||
|
||||
void decode(void)
|
||||
{
|
||||
int i, j, k, r, c;
|
||||
|
||||
for (i = 0; i < N - F; i++) buffer[i] = ' ';
|
||||
r = N - F;
|
||||
while ((c = getbit(1)) != EOF) {
|
||||
if (c) {
|
||||
if ((c = getbit(8)) == EOF) break;
|
||||
fputc(c, outfile);
|
||||
buffer[r++] = c; r &= (N - 1);
|
||||
} else {
|
||||
if ((i = getbit(EI)) == EOF) break;
|
||||
if ((j = getbit(EJ)) == EOF) break;
|
||||
for (k = 0; k <= j + 1; k++) {
|
||||
c = buffer[(i + k) & (N - 1)];
|
||||
fputc(c, outfile);
|
||||
buffer[r++] = c; r &= (N - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int enc;
|
||||
char *s;
|
||||
|
||||
if (argc != 4) {
|
||||
printf("Usage: lzss e/d infile outfile\n\te = encode\td = decode\n");
|
||||
return 1;
|
||||
}
|
||||
s = argv[1];
|
||||
if (s[1] == 0 && (*s == 'd' || *s == 'D' || *s == 'e' || *s == 'E'))
|
||||
enc = (*s == 'e' || *s == 'E');
|
||||
else {
|
||||
printf("? %s\n", s); return 1;
|
||||
}
|
||||
if ((infile = fopen(argv[2], "rb")) == NULL) {
|
||||
printf("? %s\n", argv[2]); return 1;
|
||||
}
|
||||
if ((outfile = fopen(argv[3], "wb")) == NULL) {
|
||||
printf("? %s\n", argv[3]); return 1;
|
||||
}
|
||||
if (enc) encode(); else decode();
|
||||
fclose(infile); fclose(outfile);
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue