(lz) example project

This commit is contained in:
neauoire 2023-11-14 18:54:31 -08:00
parent 7528540caf
commit 6080a8f68d
5 changed files with 431 additions and 0 deletions

22
cli/lz/.clang-format Normal file
View File

@ -0,0 +1,22 @@
AlignAfterOpenBracket: DontAlign
AlignEscapedNewlines: DontAlign
AlignOperands: DontAlign
AllowShortBlocksOnASingleLine: Always
AllowShortCaseLabelsOnASingleLine: true
AllowShortEnumsOnASingleLine: true
AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterDefinitionReturnType: TopLevel
BreakBeforeTernaryOperators: false
BinPackArguments: false
BinPackParameters: false
BreakBeforeBraces: WebKit
IndentCaseLabels: false
TabWidth: 4
IndentWidth: 4
ContinuationIndentWidth: 4
UseTab: ForContinuationAndIndentation
ColumnLimit: 0
ReflowComments: false
SortIncludes: false
SpaceBeforeParens: false

86
cli/lz/example.txt Normal file
View File

@ -0,0 +1,86 @@
Yo, listen up here's a story
About a little guy
That lives in a blue world
And all day and all night
And everything he sees is just blue
Like him inside and outside
Blue his house
With a blue little window
And a blue corvette
And everything is blue for him
And himself and everybody around
Cause he ain't got nobody to listen to
I'm blue
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
I'm blue
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
I have a blue house
With a blue window
Blue is the colour of all that I wear
Blue are the streets
And all the trees are too
I have a girlfriend and she is so blue
Blue are the people here
That walk around
Blue like my corvette its in and outside
Blue are the words I say
And what I think
Blue are the feelings
That live inside me
I'm blue
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
I'm blue
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
I have a blue house
With a blue window
Blue is the colour of all that I wear
Blue are the streets
And all the trees are too
I have a girlfriend and she is so blue
Blue are the people here
That walk around
Blue like my corvette, its in and outside
Blue are the words I say
And what I think
Blue are the feelings
That live inside me
I'm blue
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
I'm blue
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di
Da ba dee da ba di

56
cli/lz/explanation.md Normal file
View File

@ -0,0 +1,56 @@
Simple Uxn LZ Format
====================
Goals:
* Anyone can implement it
* Small source code size
* Easy to implement from Uxn
* Mildly better than RLE
Non-goals:
* High compression ratio
* High compression speed
Format
------
It's a stream of commands. The first byte encodes the first command. Read the commands from the input until there's no more input.
There are two commands. Literal and dictionary.
```
Byte 1 Byte 2+n
┌─────────────────┐ ┌─────
Literal │ 0 x x x x x x x │ │ ....
(Always 1 byte) └─────────────────┘ └─────
Length of literal Bytes to copy to output
(Adjust by adding 1)
Byte 1 Byte 2
Dictionary ┌─────────────────┐ ┌─────────────────┐
(2 bytes version)│ 1 0 x x x x x x │ │ x x x x x x x x │
└─────────────────┘ └─────────────────┘
Length of Offset into
dictionary match dictionary
(Adjust by adding 4) (Adjust by adding 1)
Byte 1 Byte 2 Byte 3
Dictionary ┌─────────────────┬─────────────────┐ ┌─────────────────┐
(3 bytes version)│ 1 1 x x x x x x │ x x x x x x x x │ │ x x x x x x x x │
└─────────────────┴─────────────────┘ └─────────────────┘
Length of dictionary match Offset into
(Adjust by adding 4) dictionary
(Adjust by adding 1)
```
* The maximum dictionary history size is 256 bytes.
* Dictionary offsets should be treated as the distance from the end of last byte that was output.
* Example: an offset of 0 means go back by 1 bytes into the history.
* `a b c d e f|g`
* Example: an offset of 5 means go back by 6 bytes into the history.
* `a|b c d e f g`

1
cli/lz/in.txt Normal file
View File

@ -0,0 +1 @@
abracadabra

266
cli/lz/lz_main.c Normal file
View File

@ -0,0 +1,266 @@
#include <stdio.h>
#include <stdlib.h>
#include "lz.h"
enum { MinMatchLength = 4 };
int
uxn_lz_compress(void *output, int output_size, const void *input, int input_size)
{
int dict_len, match_len, i, string_len, match_ctl;
unsigned char *out = output, *combine = 0;
const unsigned char *dict, *dict_best = 0, *in = input, *start = in,
*end = in + input_size;
while(in != end) {
dict_len = (int)(in - start); /* Get available dictionary size (history of
original output) */
if(dict_len > 256)
dict_len = 256; /* Limit history lookback to 256 bytes */
dict = in - dict_len; /* Start of dictionary */
string_len = (int)(end - in); /* Size of the string to search for */
if(string_len > 0x3FFF + MinMatchLength)
string_len = 0x3FFF + MinMatchLength;
/* ^ Limit string length to what we can fit in 14 bits, plus the minimum
* match length */
match_len = 0; /* This will hold the length of our best match */
for(; dict_len;
dict += 1, dict_len -= 1) /* Iterate through the dictionary */
{
for(i = 0;; i++) /* Find common prefix length with the string */
{
if(i == string_len) {
match_len = i;
dict_best = dict;
goto done_search;
}
/* ^ If we reach the end of the string, this is the best possible match.
* End. */
if(in[i] != dict[i % dict_len])
break; /* Dictionary repeats if we hit the end */
}
if(i > match_len) {
match_len = i;
dict_best = dict;
}
}
done_search:
if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */
{
if((output_size -= 2) < 0)
goto overflow;
match_ctl =
match_len -
MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
if(match_ctl >
0x3F) /* Match is long enough to use 2 bytes for the size */
{
if((output_size -= 1) < 0)
goto overflow;
*out++ = match_ctl >> 8 | 0x40 |
0x80; /* High byte of the size, with both flags set */
*out++ = match_ctl; /* Low byte of the size */
} else /* Use 1 byte for the size */
{
*out++ = match_ctl | 0x80; /* Set the "dictionary" flag */
}
*out++ = in - dict_best -
1; /* Write offset into history. (0 is -1, 1 is -2, ...) */
in += match_len; /* Advance input by size of the match */
combine = 0; /* Disable combining previous literal, if any */
continue;
}
if(combine) /* Combine with previous literal */
{
if((output_size -= 1) < 0)
goto overflow;
if(++*combine == 127)
combine = 0; /* If the literal hits its size limit, terminate it. */
} else /* Start a new literal */
{
if((output_size -= 2) < 0)
goto overflow;
combine = out++; /* Store this address, and later use it to increment the
literal size. */
*combine = 0; /* The 0 here means literal of length 1. */
}
*out++ = *in++; /* Write 1 literal byte from the input to the output. */
}
return (int)(out - (unsigned char *)output);
overflow:
return -1;
}
int
uxn_lz_expand(void *output, int output_size, const void *input, int input_size)
{
int num, offset, written = 0;
unsigned char *out = output;
const unsigned char *from, *in = input;
while(input_size) {
num = *in++;
if(num > 127) /* Dictionary */
{
if((input_size -= 1) < 0)
goto malformed;
num &= 0x7F;
if(num & 0x40) {
if((input_size -= 1) < 0)
goto malformed;
num = *in++ | num << 8 & 0x3FFF;
}
num += MinMatchLength;
offset = *in++ + 1;
if(offset > written)
goto malformed;
from = out + written - offset;
} else /* Literal */
{
input_size -= ++num;
if(input_size < 0)
goto malformed;
from = in, in += num;
}
if(written + num > output_size)
goto overflow;
while(num--)
out[written++] = *from++;
}
return written;
overflow:
malformed:
return -1;
}
int
uxn_lz_expand_stream(struct uxn_lz_expand_t *a)
{
/* Copy struct to stack variables for compiler optimizations */
unsigned char *next_in = a->next_in, *next_out = a->next_out;
int avail_in = a->avail_in, avail_out = a->avail_out;
int dict_len = a->dict_len, copy_num = a->copy_num;
unsigned char dict_read_pos = a->dict_read_pos,
dict_write_pos = a->dict_write_pos, *dict = a->dict;
int result = 0;
switch(a->state) {
case 0:
for(; avail_in;) {
copy_num = *next_in++;
avail_in--;
if(copy_num > 127) /* Dictionary */
{
copy_num &= 0x7F;
if(copy_num & 0x40) {
case 1:
if(!avail_in) {
a->state = 1;
goto need_more;
}
avail_in--;
copy_num = *next_in++ | copy_num << 8 & 0x3FFF;
}
copy_num += MinMatchLength;
case 2:
if(!avail_in) {
a->state = 2;
goto need_more;
}
avail_in--;
dict_read_pos = *next_in++ + 1;
if(dict_read_pos > dict_len) {
a->state = 5;
result = -1;
goto flush;
} /* Malformed */
dict_read_pos = dict_write_pos - dict_read_pos;
if((dict_len += copy_num) > 256)
dict_len = 256;
case 3:
do {
if(!avail_out) {
a->state = 3;
goto need_more;
}
*next_out++ = dict[dict_write_pos++] = dict[dict_read_pos++];
avail_out--;
} while(--copy_num);
} else /* Literal */
{
copy_num++;
if((dict_len += copy_num) > 256)
dict_len = 256;
case 4:
do {
if(!avail_in || !avail_out) {
a->state = 4;
goto need_more;
}
*next_out++ = dict[dict_write_pos++] = *next_in++;
avail_in--, avail_out--;
} while(--copy_num);
}
}
a->state = 0;
case 5:;
}
need_more:
flush:
/* Flush stack variables back to struct */
a->next_in = next_in, a->next_out = next_out;
a->avail_in = avail_in, a->avail_out = avail_out;
a->dict_len = dict_len, a->copy_num = copy_num;
a->dict_read_pos = dict_read_pos, a->dict_write_pos = dict_write_pos;
return result;
}
unsigned int
uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size)
{
unsigned int x = seed >> 16, y = seed, c;
unsigned char *in = bytes, *end = in + bytes_size;
for(; in != end; in++) {
c = *in << 8 | *in;
x = x * 0x2443 + c;
y = y * 0x118d + c;
}
return x << 16 | (y & 0xFFFF);
}
int
main(int argc, char *argv[])
{
int enc;
char *s;
char *my_byte_buffer = malloc(1000000);
FILE *fp = fopen("example.txt", "rb");
char buffer[1000000];
size_t i;
for(i = 0; i < 1000000; ++i) {
int c = getc(fp);
if(c == EOF) {
buffer[i] = 0x00;
break;
}
buffer[i] = c;
printf("%02x\n", c);
}
int res = uxn_lz_compress(my_byte_buffer, 1000000, &buffer, i);
if(res < 0)
printf("ERROR\n");
printf("!!!%d -> %d\n", i, res);
// Other way
char *output2 = malloc(1000000);
int res2 = uxn_lz_expand(output2, 1000000, &buffer, i);
printf("!!!%d -> %d\n", res, res2);
printf("%s\n", output2);
return 0;
}