Let us work from the example instead..

This commit is contained in:
neauoire 2023-11-16 20:47:50 -08:00
parent f39247c707
commit 6df064d8cf
4 changed files with 31 additions and 336 deletions

View File

@ -9,10 +9,9 @@ ASM="uxncli $HOME/roms/drifblim.rom"
if [[ "$*" == *"--lint"* ]] if [[ "$*" == *"--lint"* ]]
then then
$LIN decoder.tal
clang-format -i lz_main.c
clang-format -i ulzdec.c clang-format -i ulzdec.c
clang-format -i ulzenc.c clang-format -i ulzenc.c
$LIN decoder.tal
fi fi
# Building # Building

View File

@ -1,20 +0,0 @@
/* These functions return -1 if there wasn't enough space in output.
* LZDecompress can also return -1 if the input data was malformed,
* Returns the number of bytes written to output on success. */
int uxn_lz_compress(void *output, int output_size, const void *input, int input_size);
int uxn_lz_expand(void *output, int output_size, const void *input, int input_size);
struct uxn_lz_expand_t {
unsigned char *next_in, *next_out;
int avail_in, avail_out;
int dict_len, copy_num, state;
unsigned char dict_read_pos, dict_write_pos, dict[256];
};
int uxn_lz_expand_stream(struct uxn_lz_expand_t *a);
#define UXN_CHECKSUM_SEED 0x1234ABCD
unsigned int uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size);

View File

@ -1,268 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include "lz.h"
enum { MinMatchLength = 4 };
int
uxn_lz_compress(void *output, int output_size, const void *input, int input_size)
{
int dict_len, match_len, i, string_len, match_ctl;
unsigned char *out = output, *combine = 0;
const unsigned char *dict, *dict_best = 0, *in = input, *start = in,
*end = in + input_size;
while(in != end) {
dict_len = (int)(in - start); /* Get available dictionary size (history of
original output) */
if(dict_len > 256)
dict_len = 256; /* Limit history lookback to 256 bytes */
dict = in - dict_len; /* Start of dictionary */
string_len = (int)(end - in); /* Size of the string to search for */
if(string_len > 0x3FFF + MinMatchLength)
string_len = 0x3FFF + MinMatchLength;
/* ^ Limit string length to what we can fit in 14 bits, plus the minimum
* match length */
match_len = 0; /* This will hold the length of our best match */
for(; dict_len;
dict += 1, dict_len -= 1) /* Iterate through the dictionary */
{
for(i = 0;; i++) /* Find common prefix length with the string */
{
if(i == string_len) {
match_len = i;
dict_best = dict;
goto done_search;
}
/* ^ If we reach the end of the string, this is the best possible match.
* End. */
if(in[i] != dict[i % dict_len])
break; /* Dictionary repeats if we hit the end */
}
if(i > match_len) {
match_len = i;
dict_best = dict;
}
}
done_search:
if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */
{
if((output_size -= 2) < 0)
goto overflow;
match_ctl =
match_len -
MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
if(match_ctl >
0x3F) /* Match is long enough to use 2 bytes for the size */
{
if((output_size -= 1) < 0)
goto overflow;
*out++ = match_ctl >> 8 | 0x40 |
0x80; /* High byte of the size, with both flags set */
*out++ = match_ctl; /* Low byte of the size */
} else /* Use 1 byte for the size */
{
*out++ = match_ctl | 0x80; /* Set the "dictionary" flag */
}
*out++ = in - dict_best -
1; /* Write offset into history. (0 is -1, 1 is -2, ...) */
in += match_len; /* Advance input by size of the match */
combine = 0; /* Disable combining previous literal, if any */
continue;
}
if(combine) /* Combine with previous literal */
{
if((output_size -= 1) < 0)
goto overflow;
if(++*combine == 127)
combine = 0; /* If the literal hits its size limit, terminate it. */
} else /* Start a new literal */
{
if((output_size -= 2) < 0)
goto overflow;
combine = out++; /* Store this address, and later use it to increment the
literal size. */
*combine = 0; /* The 0 here means literal of length 1. */
}
*out++ = *in++; /* Write 1 literal byte from the input to the output. */
}
return (int)(out - (unsigned char *)output);
overflow:
return -1;
}
int
uxn_lz_expand(void *output, int output_size, const void *input, int input_size)
{
int num, offset, written = 0;
unsigned char *out = output;
const unsigned char *from, *in = input;
while(input_size) {
num = *in++;
if(num > 127) /* Dictionary */
{
if((input_size -= 1) < 0)
goto malformed;
num &= 0x7F;
if(num & 0x40) {
if((input_size -= 1) < 0)
goto malformed;
num = *in++ | num << 8 & 0x3FFF;
}
num += MinMatchLength;
offset = *in++ + 1;
if(offset > written)
goto malformed;
from = out + written - offset;
} else /* Literal */
{
input_size -= ++num;
if(input_size < 0)
goto malformed;
from = in, in += num;
}
if(written + num > output_size)
goto overflow;
while(num--)
out[written++] = *from++;
}
return written;
overflow:
malformed:
return -1;
}
int
uxn_lz_expand_stream(struct uxn_lz_expand_t *a)
{
/* Copy struct to stack variables for compiler optimizations */
unsigned char *next_in = a->next_in, *next_out = a->next_out;
int avail_in = a->avail_in, avail_out = a->avail_out;
int dict_len = a->dict_len, copy_num = a->copy_num;
unsigned char dict_read_pos = a->dict_read_pos,
dict_write_pos = a->dict_write_pos, *dict = a->dict;
int result = 0;
switch(a->state) {
case 0:
for(; avail_in;) {
copy_num = *next_in++;
avail_in--;
if(copy_num > 127) /* Dictionary */
{
copy_num &= 0x7F;
if(copy_num & 0x40) {
case 1:
if(!avail_in) {
a->state = 1;
goto need_more;
}
avail_in--;
copy_num = *next_in++ | copy_num << 8 & 0x3FFF;
}
copy_num += MinMatchLength;
case 2:
if(!avail_in) {
a->state = 2;
goto need_more;
}
avail_in--;
dict_read_pos = *next_in++ + 1;
if(dict_read_pos > dict_len) {
a->state = 5;
result = -1;
goto flush;
} /* Malformed */
dict_read_pos = dict_write_pos - dict_read_pos;
if((dict_len += copy_num) > 256)
dict_len = 256;
case 3:
do {
if(!avail_out) {
a->state = 3;
goto need_more;
}
*next_out++ = dict[dict_write_pos++] = dict[dict_read_pos++];
avail_out--;
} while(--copy_num);
} else /* Literal */
{
copy_num++;
if((dict_len += copy_num) > 256)
dict_len = 256;
case 4:
do {
if(!avail_in || !avail_out) {
a->state = 4;
goto need_more;
}
*next_out++ = dict[dict_write_pos++] = *next_in++;
avail_in--, avail_out--;
} while(--copy_num);
}
}
a->state = 0;
case 5:;
}
need_more:
flush:
/* Flush stack variables back to struct */
a->next_in = next_in, a->next_out = next_out;
a->avail_in = avail_in, a->avail_out = avail_out;
a->dict_len = dict_len, a->copy_num = copy_num;
a->dict_read_pos = dict_read_pos, a->dict_write_pos = dict_write_pos;
return result;
}
unsigned int
uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size)
{
unsigned int x = seed >> 16, y = seed, c;
unsigned char *in = bytes, *end = in + bytes_size;
for(; in != end; in++) {
c = *in << 8 | *in;
x = x * 0x2443 + c;
y = y * 0x118d + c;
}
return x << 16 | (y & 0xFFFF);
}
/* cc lz_main.c -o main && ./main */
int
main(int argc, char *argv[])
{
char *s;
char *my_byte_buffer = malloc(1000000);
FILE *fp = fopen("example.txt", "rb");
char buffer[1000000];
size_t i;
for(i = 0; i < 1000000; ++i) {
int c = getc(fp);
if(c == EOF) {
buffer[i] = 0x00;
break;
}
buffer[i] = c;
}
int res = uxn_lz_compress(my_byte_buffer, 1000000, &buffer, i);
if(res < 0)
printf("ERROR\n");
printf("!!!%d -> %d\n", (int)i, (int)res);
FILE *out_file;
out_file = fopen("a.ulz", "wb");
fwrite(my_byte_buffer, 1, res, out_file);
fclose(out_file);
/* Other way */
char *output2 = malloc(1000000);
int res2 = uxn_lz_expand(output2, 1000000, &buffer, i);
printf("!!!%d -> %d\n", (int)res, (int)res2);
return 0;
}

View File

@ -15,26 +15,23 @@ error(const char *name, const char *msg)
char *raw, *mem; char *raw, *mem;
int int
uxn_lz_compress(void *output, int output_size, const void *input, int input_size) uxn_lz_compress(void *output, const void *input, int input_size)
{ {
int dict_len, match_len, i, string_len, match_ctl; int dict_len, match_len, i, string_len, match_ctl;
unsigned char *out = output, *combine = 0; unsigned char *out = output, *combine = 0;
const unsigned char *dict, *dict_best = 0, *in = input, *start = in, const unsigned char *dict, *dict_best = 0, *in = input, *start = in, *end = in + input_size;
*end = in + input_size;
while(in != end) { while(in != end) {
dict_len = (int)(in - start); /* Get available dictionary size (history of dict_len = (int)(in - start); /* Get available dictionary size (history of original output) */
original output) */
if(dict_len > 256) if(dict_len > 256)
dict_len = 256; /* Limit history lookback to 256 bytes */ dict_len = 256; /* Limit history lookback to 256 bytes */
dict = in - dict_len; /* Start of dictionary */ dict = in - dict_len; /* Start of dictionary */
string_len = (int)(end - in); /* Size of the string to search for */ string_len = (int)(end - in); /* Size of the string to search for */
if(string_len > 0x3FFF + MinMatchLength) if(string_len > 0x3FFF + MinMatchLength)
string_len = 0x3FFF + MinMatchLength; string_len = 0x3FFF + MinMatchLength;
/* ^ Limit string length to what we can fit in 14 bits, plus the minimum /* ^ Limit string length to what we can fit in 14 bits, plus the minimum
* match length */ * match length */
match_len = 0; /* This will hold the length of our best match */ match_len = 0; /* This will hold the length of our best match */
for(; dict_len; for(; dict_len; dict += 1, dict_len -= 1) /* Iterate through the dictionary */
dict += 1, dict_len -= 1) /* Iterate through the dictionary */
{ {
for(i = 0;; i++) /* Find common prefix length with the string */ for(i = 0;; i++) /* Find common prefix length with the string */
{ {
@ -56,57 +53,44 @@ uxn_lz_compress(void *output, int output_size, const void *input, int input_size
done_search: done_search:
if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */ if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */
{ {
if((output_size -= 2) < 0) match_ctl = match_len - MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
goto overflow; if(match_ctl > 0x3F) /* Match is long enough to use 2 bytes for the size */
match_ctl =
match_len -
MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
if(match_ctl >
0x3F) /* Match is long enough to use 2 bytes for the size */
{ {
if((output_size -= 1) < 0) *out++ = match_ctl >> 8 | 0x40 | 0x80; /* High byte of the size, with both flags set */
goto overflow; *out++ = match_ctl; /* Low byte of the size */
*out++ = match_ctl >> 8 | 0x40 | } else /* Use 1 byte for the size */
0x80; /* High byte of the size, with both flags set */
*out++ = match_ctl; /* Low byte of the size */
} else /* Use 1 byte for the size */
{ {
*out++ = match_ctl | 0x80; /* Set the "dictionary" flag */ *out++ = match_ctl | 0x80;
} } /* Set the "dictionary" flag */
*out++ = in - dict_best - *out++ = in - dict_best - 1; /* Write offset into history. (0 is -1, 1 is -2, ...) */
1; /* Write offset into history. (0 is -1, 1 is -2, ...) */ in += match_len; /* Advance input by size of the match */
in += match_len; /* Advance input by size of the match */ combine = 0; /* Disable combining previous literal, if any */
combine = 0; /* Disable combining previous literal, if any */
continue; continue;
} }
if(combine) /* Combine with previous literal */ /* Combine with previous literal */
{ if(combine) {
if((output_size -= 1) < 0)
goto overflow;
if(++*combine == 127) if(++*combine == 127)
combine = 0; /* If the literal hits its size limit, terminate it. */ combine = 0;
} else /* Start a new literal */ }
{ /* Start a new literal */
if((output_size -= 2) < 0) else {
goto overflow; /* Store this address, and later use it to increment the literal size. */
combine = out++; /* Store this address, and later use it to increment the combine = out++;
literal size. */ /* The 0 here means literal of length 1. */
*combine = 0; /* The 0 here means literal of length 1. */ *combine = 0;
} }
*out++ = *in++; /* Write 1 literal byte from the input to the output. */ *out++ = *in++; /* Write 1 literal byte from the input to the output. */
} }
return (int)(out - (unsigned char *)output); return (int)(out - (unsigned char *)output);
overflow:
return -1;
} }
int int
encode_ulz(FILE *src) encode_ulz(FILE *src)
{ {
int length = 0; int length = 0;
mem = malloc(1000000), raw = malloc(0x10000); mem = malloc(0x10000), raw = malloc(0x10000);
while(fread(raw + length, 1, 1, src) && ++length) {} while(fread(raw + length, 1, 1, src)) ++length;
return uxn_lz_compress(mem, 1000000, raw, length); return uxn_lz_compress(mem, raw, length);
} }
int int