129 lines
3.9 KiB
C
129 lines
3.9 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
/* cc ulzenc.c -o ulzenc && ./ulzenc example.txt */
|
|
|
|
enum { MinMatchLength = 4 };
|
|
|
|
static int
|
|
error(const char *name, const char *msg)
|
|
{
|
|
fprintf(stderr, "%s: %s\n", name, msg);
|
|
return 0;
|
|
}
|
|
|
|
char *raw, *mem;
|
|
|
|
int
|
|
uxn_lz_compress(void *output, int output_size, const void *input, int input_size)
|
|
{
|
|
int dict_len, match_len, i, string_len, match_ctl;
|
|
unsigned char *out = output, *combine = 0;
|
|
const unsigned char *dict, *dict_best = 0, *in = input, *start = in,
|
|
*end = in + input_size;
|
|
while(in != end) {
|
|
dict_len = (int)(in - start); /* Get available dictionary size (history of
|
|
original output) */
|
|
if(dict_len > 256)
|
|
dict_len = 256; /* Limit history lookback to 256 bytes */
|
|
dict = in - dict_len; /* Start of dictionary */
|
|
string_len = (int)(end - in); /* Size of the string to search for */
|
|
if(string_len > 0x3FFF + MinMatchLength)
|
|
string_len = 0x3FFF + MinMatchLength;
|
|
/* ^ Limit string length to what we can fit in 14 bits, plus the minimum
|
|
* match length */
|
|
match_len = 0; /* This will hold the length of our best match */
|
|
for(; dict_len;
|
|
dict += 1, dict_len -= 1) /* Iterate through the dictionary */
|
|
{
|
|
for(i = 0;; i++) /* Find common prefix length with the string */
|
|
{
|
|
if(i == string_len) {
|
|
match_len = i;
|
|
dict_best = dict;
|
|
goto done_search;
|
|
}
|
|
/* ^ If we reach the end of the string, this is the best possible match.
|
|
* End. */
|
|
if(in[i] != dict[i % dict_len])
|
|
break; /* Dictionary repeats if we hit the end */
|
|
}
|
|
if(i > match_len) {
|
|
match_len = i;
|
|
dict_best = dict;
|
|
}
|
|
}
|
|
done_search:
|
|
if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */
|
|
{
|
|
if((output_size -= 2) < 0)
|
|
goto overflow;
|
|
match_ctl =
|
|
match_len -
|
|
MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
|
|
if(match_ctl >
|
|
0x3F) /* Match is long enough to use 2 bytes for the size */
|
|
{
|
|
if((output_size -= 1) < 0)
|
|
goto overflow;
|
|
*out++ = match_ctl >> 8 | 0x40 |
|
|
0x80; /* High byte of the size, with both flags set */
|
|
*out++ = match_ctl; /* Low byte of the size */
|
|
} else /* Use 1 byte for the size */
|
|
{
|
|
*out++ = match_ctl | 0x80; /* Set the "dictionary" flag */
|
|
}
|
|
*out++ = in - dict_best -
|
|
1; /* Write offset into history. (0 is -1, 1 is -2, ...) */
|
|
in += match_len; /* Advance input by size of the match */
|
|
combine = 0; /* Disable combining previous literal, if any */
|
|
continue;
|
|
}
|
|
if(combine) /* Combine with previous literal */
|
|
{
|
|
if((output_size -= 1) < 0)
|
|
goto overflow;
|
|
if(++*combine == 127)
|
|
combine = 0; /* If the literal hits its size limit, terminate it. */
|
|
} else /* Start a new literal */
|
|
{
|
|
if((output_size -= 2) < 0)
|
|
goto overflow;
|
|
combine = out++; /* Store this address, and later use it to increment the
|
|
literal size. */
|
|
*combine = 0; /* The 0 here means literal of length 1. */
|
|
}
|
|
*out++ = *in++; /* Write 1 literal byte from the input to the output. */
|
|
}
|
|
return (int)(out - (unsigned char *)output);
|
|
overflow:
|
|
return -1;
|
|
}
|
|
|
|
int
|
|
encode_ulz(FILE *src)
|
|
{
|
|
int length = 0;
|
|
mem = malloc(1000000), raw = malloc(0x10000);
|
|
while(fread(raw + length, 1, 1, src) && ++length) {}
|
|
return uxn_lz_compress(mem, 1000000, raw, length);
|
|
}
|
|
|
|
int
|
|
main(int argc, char *argv[])
|
|
{
|
|
int res;
|
|
FILE *src, *dst;
|
|
if(argv[1][0] == '-' && argv[1][1] == 'v')
|
|
return !fprintf(stdout, "Ulzenc - ULZ Encoder, 15 Nov 2023.\n");
|
|
if(argc != 3)
|
|
return error("usage", "ulzenc [-v] a.bin b.ulz ");
|
|
if(!(src = fopen(argv[1], "rb")))
|
|
return !error("Invalid input file", argv[1]);
|
|
res = encode_ulz(src);
|
|
if(!(dst = fopen(argv[2], "wb")))
|
|
return !error("Invalid output file", argv[1]);
|
|
fwrite(mem, res, 1, dst);
|
|
printf("Compressed %s -> %s(%d bytes).\n", argv[1], argv[2], res);
|
|
return 0;
|
|
} |