uxn-utils/cli/lz/ulzenc.c

#include <stdio.h>
#include <stdlib.h>

/* cc ulzenc.c -o ulzenc && ./ulzenc a.bin b.ulz */

static int
error(const char *name, const char *msg)
{
	fprintf(stderr, "%s: %s\n", name, msg);
	return 0;
}

enum { MinMatchLength = 4 };

char *raw, *mem;

int
uxn_lz_compress(void *output, const void *input, int input_size)
{
	int dict_len, match_len, i, string_len, match_ctl;
	unsigned char *out = output, *combine = 0;
	const unsigned char *dict, *dict_best = 0, *in = input, *start = in, *end = in + input_size;
	while(in != end) {
		dict_len = (int)(in - start); /* Get available dictionary size (history of original output) */
		if(dict_len > 256)
			dict_len = 256;           /* Limit history lookback to 256 bytes */
		dict = in - dict_len;         /* Start of dictionary */
		string_len = (int)(end - in); /* Size of the string to search for */
		if(string_len > 0x3FFF + MinMatchLength)
			string_len = 0x3FFF + MinMatchLength;
		/* ^ Limit string length to what we can fit in 14 bits, plus the minimum 
		* match length */
		match_len = 0;                            /* This will hold the length of our best match */
		for(; dict_len; dict += 1, dict_len -= 1) /* Iterate through the dictionary */
		{
			for(i = 0;; i++) /* Find common prefix length with the string */
			{
				if(i == string_len) {
					match_len = i;
					dict_best = dict;
					goto done_search;
				}
				/* ^ If we reach the end of the string, this is the best possible match.
         * End. */
				if(in[i] != dict[i % dict_len])
					break; /* Dictionary repeats if we hit the end */
			}
			if(i > match_len) {
				match_len = i;
				dict_best = dict;
			}
		}
	done_search:
		if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */
		{
			match_ctl = match_len - MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
			if(match_ctl > 0x3F)                    /* Match is long enough to use 2 bytes for the size */
			{
				*out++ = match_ctl >> 8 | 0x40 | 0x80; /* High byte of the size, with both flags set */
				*out++ = match_ctl;                    /* Low byte of the size */
			} else                                     /* Use 1 byte for the size */
			{
				*out++ = match_ctl | 0x80;
			}                            /* Set the "dictionary" flag */
			*out++ = in - dict_best - 1; /* Write offset into history. (0 is -1, 1 is -2, ...) */
			in += match_len;             /* Advance input by size of the match */
			combine = 0;                 /* Disable combining previous literal, if any */
			continue;
		}
		/* Combine with previous literal */
		if(combine) {
			if(++*combine == 127)
				combine = 0;
		}
		/* Start a new literal */
		else {
			/* Store this address, and later use it to increment the literal size. */
			combine = out++;
			/* The 0 here means literal of length 1. */
			*combine = 0;
		}
		*out++ = *in++; /* Write 1 literal byte from the input to the output. */
	}
	return (int)(out - (unsigned char *)output);
}

int
encode_ulz(FILE *src)
{
	int length = 0;
	mem = malloc(0x10000), raw = malloc(0x10000);
	while(fread(raw + length, 1, 1, src)) ++length;
	return uxn_lz_compress(mem, raw, length);
}

int
main(int argc, char *argv[])
{
	int res;
	FILE *src, *dst;
	if(argv[1][0] == '-' && argv[1][1] == 'v')
		return !fprintf(stdout, "Ulzenc - ULZ Encoder, 15 Nov 2023.\n");
	if(argc != 3)
		return error("usage", "ulzenc [-v] a.bin b.ulz ");
	if(!(src = fopen(argv[1], "rb")))
		return !error("Invalid input file", argv[1]);
	res = encode_ulz(src);
	if(!(dst = fopen(argv[2], "wb")))
		return !error("Invalid output file", argv[1]);
	fwrite(mem, res, 1, dst);
	printf("Compressed %s -> %s(%d bytes).\n", argv[1], argv[2], res);
	return 0;
}
Starting encoding 2023-11-15 23:30:28 -05:00			`#include <stdio.h>`
			`#include <stdlib.h>`

Housekeeping 2023-11-16 23:53:01 -05:00			`/* cc ulzenc.c -o ulzenc && ./ulzenc a.bin b.ulz */`
Cleanup 2023-11-16 23:35:52 -05:00
Starting encoding 2023-11-15 23:30:28 -05:00			`static int`
			`error(const char name, const char msg)`
			`{`
			`fprintf(stderr, "%s: %s\n", name, msg);`
			`return 0;`
			`}`

Housekeeping 2023-11-16 23:53:01 -05:00			`enum { MinMatchLength = 4 };`

Cleanup 2023-11-16 23:35:52 -05:00			`char raw, mem;`
Starting encoding 2023-11-15 23:30:28 -05:00
			`int`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`uxn_lz_compress(void output, const void input, int input_size)`
Starting encoding 2023-11-15 23:30:28 -05:00			`{`
Cleanup 2023-11-16 23:35:52 -05:00			`int dict_len, match_len, i, string_len, match_ctl;`
			`unsigned char out = output, combine = 0;`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`const unsigned char dict, dict_best = 0, in = input, start = in, *end = in + input_size;`
Cleanup 2023-11-16 23:35:52 -05:00			`while(in != end) {`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`dict_len = (int)(in - start); /* Get available dictionary size (history of original output) */`
Cleanup 2023-11-16 23:35:52 -05:00			`if(dict_len > 256)`
			`dict_len = 256; /* Limit history lookback to 256 bytes */`
			`dict = in - dict_len; /* Start of dictionary */`
			`string_len = (int)(end - in); /* Size of the string to search for */`
			`if(string_len > 0x3FFF + MinMatchLength)`
			`string_len = 0x3FFF + MinMatchLength;`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`/* ^ Limit string length to what we can fit in 14 bits, plus the minimum`
			`* match length */`
			`match_len = 0; /* This will hold the length of our best match */`
			`for(; dict_len; dict += 1, dict_len -= 1) /* Iterate through the dictionary */`
Cleanup 2023-11-16 23:35:52 -05:00			`{`
			`for(i = 0;; i++) /* Find common prefix length with the string */`
			`{`
			`if(i == string_len) {`
			`match_len = i;`
			`dict_best = dict;`
			`goto done_search;`
			`}`
			`/* ^ If we reach the end of the string, this is the best possible match.`
			`* End. */`
			`if(in[i] != dict[i % dict_len])`
			`break; /* Dictionary repeats if we hit the end */`
			`}`
			`if(i > match_len) {`
			`match_len = i;`
			`dict_best = dict;`
			`}`
			`}`
			`done_search:`
			`if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */`
			`{`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`match_ctl = match_len - MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */`
			`if(match_ctl > 0x3F) /* Match is long enough to use 2 bytes for the size */`
Cleanup 2023-11-16 23:35:52 -05:00			`{`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`out++ = match_ctl >> 8 \| 0x40 \| 0x80; / High byte of the size, with both flags set */`
			`out++ = match_ctl; / Low byte of the size */`
			`} else /* Use 1 byte for the size */`
Cleanup 2023-11-16 23:35:52 -05:00			`{`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`*out++ = match_ctl \| 0x80;`
			`} /* Set the "dictionary" flag */`
			`out++ = in - dict_best - 1; / Write offset into history. (0 is -1, 1 is -2, ...) */`
			`in += match_len; /* Advance input by size of the match */`
			`combine = 0; /* Disable combining previous literal, if any */`
Cleanup 2023-11-16 23:35:52 -05:00			`continue;`
			`}`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`/* Combine with previous literal */`
			`if(combine) {`
Cleanup 2023-11-16 23:35:52 -05:00			`if(++*combine == 127)`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`combine = 0;`
			`}`
			`/* Start a new literal */`
			`else {`
			`/* Store this address, and later use it to increment the literal size. */`
			`combine = out++;`
			`/* The 0 here means literal of length 1. */`
			`*combine = 0;`
Cleanup 2023-11-16 23:35:52 -05:00			`}`
			`out++ = in++; /* Write 1 literal byte from the input to the output. */`
			`}`
			`return (int)(out - (unsigned char *)output);`
Starting encoding 2023-11-15 23:30:28 -05:00			`}`

Cleaned up progress on encoder 2023-11-16 14:02:10 -05:00			`int`
			`encode_ulz(FILE *src)`
			`{`
Cleanup 2023-11-16 23:35:52 -05:00			`int length = 0;`
Let us work from the example instead.. 2023-11-16 23:47:50 -05:00			`mem = malloc(0x10000), raw = malloc(0x10000);`
			`while(fread(raw + length, 1, 1, src)) ++length;`
			`return uxn_lz_compress(mem, raw, length);`
Cleaned up progress on encoder 2023-11-16 14:02:10 -05:00			`}`

Starting encoding 2023-11-15 23:30:28 -05:00			`int`
			`main(int argc, char *argv[])`
			`{`
			`int res;`
			`FILE src, dst;`
			`if(argv[1][0] == '-' && argv[1][1] == 'v')`
			`return !fprintf(stdout, "Ulzenc - ULZ Encoder, 15 Nov 2023.\n");`
			`if(argc != 3)`
			`return error("usage", "ulzenc [-v] a.bin b.ulz ");`
			`if(!(src = fopen(argv[1], "rb")))`
			`return !error("Invalid input file", argv[1]);`
			`res = encode_ulz(src);`
			`if(!(dst = fopen(argv[2], "wb")))`
			`return !error("Invalid output file", argv[1]);`
Cleanup 2023-11-16 23:35:52 -05:00			`fwrite(mem, res, 1, dst);`
Starting encoding 2023-11-15 23:30:28 -05:00			`printf("Compressed %s -> %s(%d bytes).\n", argv[1], argv[2], res);`
			`return 0;`
			`}`