uxn-utils/cli/lz/lz_main.c


#include <stdio.h>
#include <stdlib.h>

#include "lz.h"

enum { MinMatchLength = 4 };

int
uxn_lz_compress(void *output, int output_size, const void *input, int input_size)
{
	int dict_len, match_len, i, string_len, match_ctl;
	unsigned char *out = output, *combine = 0;
	const unsigned char *dict, *dict_best = 0, *in = input, *start = in,
							   *end = in + input_size;
	while(in != end) {
		dict_len = (int)(in - start); /* Get available dictionary size (history of
                                     original output) */
		if(dict_len > 256)
			dict_len = 256;           /* Limit history lookback to 256 bytes */
		dict = in - dict_len;         /* Start of dictionary */
		string_len = (int)(end - in); /* Size of the string to search for */
		if(string_len > 0x3FFF + MinMatchLength)
			string_len = 0x3FFF + MinMatchLength;
		/* ^ Limit string length to what we can fit in 14 bits, plus the minimum
     * match length */
		match_len = 0; /* This will hold the length of our best match */
		for(; dict_len;
			dict += 1, dict_len -= 1) /* Iterate through the dictionary */
		{
			for(i = 0;; i++) /* Find common prefix length with the string */
			{
				if(i == string_len) {
					match_len = i;
					dict_best = dict;
					goto done_search;
				}
				/* ^ If we reach the end of the string, this is the best possible match.
         * End. */
				if(in[i] != dict[i % dict_len])
					break; /* Dictionary repeats if we hit the end */
			}
			if(i > match_len) {
				match_len = i;
				dict_best = dict;
			}
		}
	done_search:
		if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */
		{
			if((output_size -= 2) < 0)
				goto overflow;
			match_ctl =
				match_len -
				MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
			if(match_ctl >
				0x3F) /* Match is long enough to use 2 bytes for the size */
			{
				if((output_size -= 1) < 0)
					goto overflow;
				*out++ = match_ctl >> 8 | 0x40 |
					0x80;           /* High byte of the size, with both flags set */
				*out++ = match_ctl; /* Low byte of the size */
			} else                  /* Use 1 byte for the size */
			{
				*out++ = match_ctl | 0x80; /* Set the "dictionary" flag */
			}
			*out++ = in - dict_best -
				1;           /* Write offset into history. (0 is -1, 1 is -2, ...) */
			in += match_len; /* Advance input by size of the match */
			combine = 0;     /* Disable combining previous literal, if any */
			continue;
		}
		if(combine) /* Combine with previous literal */
		{
			if((output_size -= 1) < 0)
				goto overflow;
			if(++*combine == 127)
				combine = 0; /* If the literal hits its size limit, terminate it. */
		} else               /* Start a new literal */
		{
			if((output_size -= 2) < 0)
				goto overflow;
			combine = out++; /* Store this address, and later use it to increment the
                          literal size. */
			*combine = 0;    /* The 0 here means literal of length 1. */
		}
		*out++ = *in++; /* Write 1 literal byte from the input to the output. */
	}
	return (int)(out - (unsigned char *)output);
overflow:
	return -1;
}

int
uxn_lz_expand(void *output, int output_size, const void *input, int input_size)
{
	int num, offset, written = 0;
	unsigned char *out = output;
	const unsigned char *from, *in = input;
	while(input_size) {
		num = *in++;
		if(num > 127) /* Dictionary */
		{
			if((input_size -= 1) < 0)
				goto malformed;
			num &= 0x7F;
			if(num & 0x40) {
				if((input_size -= 1) < 0)
					goto malformed;
				num = *in++ | num << 8 & 0x3FFF;
			}
			num += MinMatchLength;
			offset = *in++ + 1;
			if(offset > written)
				goto malformed;
			from = out + written - offset;
		} else /* Literal */
		{
			input_size -= ++num;
			if(input_size < 0)
				goto malformed;
			from = in, in += num;
		}
		if(written + num > output_size)
			goto overflow;
		while(num--)
			out[written++] = *from++;
	}
	return written;
overflow:
malformed:
	return -1;
}

int
uxn_lz_expand_stream(struct uxn_lz_expand_t *a)
{
	/* Copy struct to stack variables for compiler optimizations */
	unsigned char *next_in = a->next_in, *next_out = a->next_out;
	int avail_in = a->avail_in, avail_out = a->avail_out;
	int dict_len = a->dict_len, copy_num = a->copy_num;
	unsigned char dict_read_pos = a->dict_read_pos,
				  dict_write_pos = a->dict_write_pos, *dict = a->dict;
	int result = 0;
	switch(a->state) {
	case 0:
		for(; avail_in;) {
			copy_num = *next_in++;
			avail_in--;
			if(copy_num > 127) /* Dictionary */
			{
				copy_num &= 0x7F;
				if(copy_num & 0x40) {
				case 1:
					if(!avail_in) {
						a->state = 1;
						goto need_more;
					}
					avail_in--;
					copy_num = *next_in++ | copy_num << 8 & 0x3FFF;
				}
				copy_num += MinMatchLength;
			case 2:
				if(!avail_in) {
					a->state = 2;
					goto need_more;
				}
				avail_in--;
				dict_read_pos = *next_in++ + 1;
				if(dict_read_pos > dict_len) {
					a->state = 5;
					result = -1;
					goto flush;
				} /* Malformed */
				dict_read_pos = dict_write_pos - dict_read_pos;
				if((dict_len += copy_num) > 256)
					dict_len = 256;
			case 3:
				do {
					if(!avail_out) {
						a->state = 3;
						goto need_more;
					}
					*next_out++ = dict[dict_write_pos++] = dict[dict_read_pos++];
					avail_out--;
				} while(--copy_num);
			} else /* Literal */
			{
				copy_num++;
				if((dict_len += copy_num) > 256)
					dict_len = 256;
			case 4:
				do {
					if(!avail_in || !avail_out) {
						a->state = 4;
						goto need_more;
					}
					*next_out++ = dict[dict_write_pos++] = *next_in++;
					avail_in--, avail_out--;
				} while(--copy_num);
			}
		}
		a->state = 0;
	case 5:;
	}
need_more:
flush:
	/* Flush stack variables back to struct */
	a->next_in = next_in, a->next_out = next_out;
	a->avail_in = avail_in, a->avail_out = avail_out;
	a->dict_len = dict_len, a->copy_num = copy_num;
	a->dict_read_pos = dict_read_pos, a->dict_write_pos = dict_write_pos;
	return result;
}

unsigned int
uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size)
{
	unsigned int x = seed >> 16, y = seed, c;
	unsigned char *in = bytes, *end = in + bytes_size;
	for(; in != end; in++) {
		c = *in << 8 | *in;
		x = x * 0x2443 + c;
		y = y * 0x118d + c;
	}
	return x << 16 | (y & 0xFFFF);
}

/* cc lz_main.c -o main && ./main */

int
main(int argc, char *argv[])
{
	char *s;
	char *my_byte_buffer = malloc(1000000);
	FILE *fp = fopen("example.txt", "rb");
	char buffer[1000000];
	size_t i;

	for(i = 0; i < 1000000; ++i) {
		int c = getc(fp);
		if(c == EOF) {
			buffer[i] = 0x00;
			break;
		}
		buffer[i] = c;
	}

	int res = uxn_lz_compress(my_byte_buffer, 1000000, &buffer, i);
	if(res < 0)
		printf("ERROR\n");
	printf("!!!%d -> %d\n", (int)i, (int)res);

	FILE *out_file;
	out_file = fopen("compressed.bin", "wb");
	fwrite(my_byte_buffer, 1, res, out_file);
	fclose(out_file);

	/* Other way */

	char *output2 = malloc(1000000);
	int res2 = uxn_lz_expand(output2, 1000000, &buffer, i);

	printf("!!!%d -> %d\n", (int)res, (int)res2);

	return 0;
}