uxn-utils/cli/lz/ulzenc.c

139 lines
3.5 KiB
C
Raw Normal View History

2023-11-15 23:30:28 -05:00
#include <stdio.h>
#include <stdlib.h>
2023-11-16 23:53:01 -05:00
/* cc ulzenc.c -o ulzenc && ./ulzenc a.bin b.ulz */
2023-11-16 23:35:52 -05:00
2023-11-15 23:30:28 -05:00
static int
error(const char *name, const char *msg)
{
fprintf(stderr, "%s: %s\n", name, msg);
return 0;
}
2023-11-16 23:53:01 -05:00
enum { MinMatchLength = 4 };
2023-11-18 14:16:27 -05:00
unsigned char *raw, *mem;
2023-11-15 23:30:28 -05:00
int
2023-11-18 14:16:27 -05:00
uxn_lz_compress(const void *input, int input_size)
2023-11-15 23:30:28 -05:00
{
2023-11-16 23:58:52 -05:00
int i, dict_len, match_len, string_len, match_ctl;
2023-11-18 14:16:27 -05:00
unsigned char *output_ptr = mem, *combine = 0;
const unsigned char *dict, *dict_best = 0;
const unsigned char *in = input, *start = in, *end = in + input_size;
2023-11-16 23:35:52 -05:00
while(in != end) {
2023-11-16 23:58:52 -05:00
/* Get available dictionary size (history of original output) */
dict_len = (int)(in - start);
2023-11-16 23:35:52 -05:00
if(dict_len > 256)
2023-11-16 23:58:52 -05:00
dict_len = 256;
/* Size of the string to search for */
2023-11-18 14:16:27 -05:00
/* Limit string length to what we can fit in 14 bits, plus the minimum match length */
2023-11-16 23:58:52 -05:00
string_len = (int)(end - in);
2023-11-16 23:35:52 -05:00
if(string_len > 0x3FFF + MinMatchLength)
string_len = 0x3FFF + MinMatchLength;
2023-11-18 14:16:27 -05:00
2023-11-18 22:34:58 -05:00
printf("DEBUG1: %04x %04x ", dict_len, string_len);
2023-11-16 23:58:52 -05:00
/* Iterate through the dictionary */
2023-11-18 15:19:30 -05:00
match_len = 0;
dict = in - dict_len;
2023-11-18 22:34:58 -05:00
printf("[%c]", in[0]);
for(; dict_len; dict++, dict_len--) {
2023-11-16 23:58:52 -05:00
/* Find common prefix length with the string */
2023-11-18 15:19:30 -05:00
/* If we reach the end of the string, this is the best possible match. End. */
2023-11-16 23:58:52 -05:00
for(i = 0;; i++) {
2023-11-16 23:35:52 -05:00
if(i == string_len) {
match_len = i;
dict_best = dict;
goto done_search;
}
2023-11-16 23:58:52 -05:00
/* Dictionary repeats if we hit the end */
2023-11-18 22:34:58 -05:00
/* printf("(#%d, %04x, %c, %c)", i, in-raw, in[i], dict[i % dict_len]); */
if(in[i] != dict[i % dict_len]){
2023-11-16 23:58:52 -05:00
break;
2023-11-18 22:34:58 -05:00
}
2023-11-16 23:35:52 -05:00
}
if(i > match_len) {
match_len = i;
dict_best = dict;
}
}
2023-11-18 14:16:27 -05:00
2023-11-18 22:34:58 -05:00
2023-11-16 23:35:52 -05:00
done_search:
2023-11-18 14:16:27 -05:00
2023-11-18 22:34:58 -05:00
printf("-> %04x \n", match_len);
2023-11-18 14:16:27 -05:00
/* CPY */
2023-11-16 23:58:52 -05:00
if(match_len >= MinMatchLength) {
2023-11-18 22:45:01 -05:00
printf("*");
2023-11-16 23:58:52 -05:00
/* More numeric range: treat 0 as 4, 1 as 5, etc. */
match_ctl = match_len - MinMatchLength;
2023-11-18 14:16:27 -05:00
/* CPY2 */
2023-11-16 23:58:52 -05:00
if(match_ctl > 0x3F) {
2023-11-18 14:16:27 -05:00
*output_ptr++ = match_ctl >> 8 | 0xc0; /* High byte of the length, with both flags set */
2023-11-18 22:45:01 -05:00
*output_ptr++ = match_ctl; /* Low byte of the length */
2023-11-18 14:16:27 -05:00
} else
*output_ptr++ = match_ctl | 0x80;
/* offset(0 is -1, 1 is -2, ...) */
*output_ptr++ = in - dict_best - 1;
in += match_len; /* Advance input by size of the match */
combine = 0; /* Disable combining previous literal, if any */
2023-11-18 22:34:58 -05:00
2023-11-16 23:35:52 -05:00
}
2023-11-18 22:34:58 -05:00
2023-11-18 14:16:27 -05:00
/* LIT */
2023-11-18 22:34:58 -05:00
else{
/* printf("LIT:%d\n", combine); */
/* Combine with previous literal */
if(combine) {
if(++*combine == 127)
combine = 0;
}
/* Start a new literal */
else {
/* Store this address, and later use it to increment the literal size. */
combine = output_ptr++;
/* The 0 here means literal of length 1. */
*combine = 0;
}
/* Write 1 literal byte from the input to the output. */
*output_ptr++ = *in++;
/* printf(">> %d\n", in-raw); */
2023-11-16 23:35:52 -05:00
}
}
2023-11-18 22:34:58 -05:00
2023-11-18 14:16:27 -05:00
return (int)(output_ptr - mem);
2023-11-15 23:30:28 -05:00
}
2023-11-16 14:02:10 -05:00
int
encode_ulz(FILE *src)
{
2023-11-16 23:35:52 -05:00
int length = 0;
2023-11-16 23:47:50 -05:00
mem = malloc(0x10000), raw = malloc(0x10000);
while(fread(raw + length, 1, 1, src)) ++length;
2023-11-18 14:16:27 -05:00
return uxn_lz_compress(raw, length);
2023-11-16 14:02:10 -05:00
}
2023-11-15 23:30:28 -05:00
int
main(int argc, char *argv[])
{
int res;
FILE *src, *dst;
if(argv[1][0] == '-' && argv[1][1] == 'v')
return !fprintf(stdout, "Ulzenc - ULZ Encoder, 15 Nov 2023.\n");
if(argc != 3)
return error("usage", "ulzenc [-v] a.bin b.ulz ");
if(!(src = fopen(argv[1], "rb")))
return !error("Invalid input file", argv[1]);
res = encode_ulz(src);
if(!(dst = fopen(argv[2], "wb")))
return !error("Invalid output file", argv[1]);
2023-11-16 23:35:52 -05:00
fwrite(mem, res, 1, dst);
2023-11-15 23:30:28 -05:00
printf("Compressed %s -> %s(%d bytes).\n", argv[1], argv[2], res);
return 0;
}