(lz) Starting uxntal encoder

This commit is contained in:
neauoire 2023-11-18 11:16:27 -08:00
parent 88904c2ef1
commit 6b3fe57067
3 changed files with 129 additions and 27 deletions

View File

@ -3,7 +3,6 @@
RELEASE_flags="-DNDEBUG -O2 -g0 -s"
DEBUG_flags="-std=c89 -D_POSIX_C_SOURCE=199309L -DDEBUG -Wall -Wno-unknown-pragmas -Wpedantic -Wshadow -Wextra -Werror=implicit-int -Werror=incompatible-pointer-types -Werror=int-conversion -Wvla -g -Og -fsanitize=address -fsanitize=undefined"
LIN="uxncli $HOME/roms/uxnlin.rom"
ASM="uxncli $HOME/roms/drifblim.rom"
@ -12,6 +11,7 @@ then
clang-format -i ulzdec.c
clang-format -i ulzenc.c
$LIN decoder.tal
$LIN encoder.tal
fi
# Building
@ -19,6 +19,15 @@ fi
cc ulzdec.c $DEBUG_flags -o ulzdec
cc ulzenc.c $DEBUG_flags -o ulzenc
$ASM ulzdec.tal ulzdec.rom
$ASM ulzenc.tal ulzenc.rom
if [[ "$*" == *"--install"* ]]
then
cp ulzenc ~/bin
cp ulzdec ~/bin
cp ulzdec.rom ~/roms
cp ulzenc.rom ~/roms
fi
# Make compressed file
@ -36,6 +45,12 @@ uxncli ulzdec.rom a.ulz b.bin && cat b.bin
echo ""
echo ""
echo "================================"
# ./ulzenc example.txt a.ulz
uxncli ulzenc.rom example.txt a.ulz
uxncli ~/roms/hx.rom a.ulz
rm ./ulzdec
rm ./ulzenc
rm ./a.ulz

View File

@ -12,31 +12,30 @@ error(const char *name, const char *msg)
enum { MinMatchLength = 4 };
char *raw, *mem;
unsigned char *raw, *mem;
int
uxn_lz_compress(void *output, const void *input, int input_size)
uxn_lz_compress(const void *input, int input_size)
{
int i, dict_len, match_len, string_len, match_ctl;
unsigned char *out = output, *combine = 0;
const unsigned char *dict, *dict_best = 0, *in = input, *start = in, *end = in + input_size;
unsigned char *output_ptr = mem, *combine = 0;
const unsigned char *dict, *dict_best = 0;
const unsigned char *in = input, *start = in, *end = in + input_size;
while(in != end) {
/* Get available dictionary size (history of original output) */
dict_len = (int)(in - start);
/* Limit history lookback to 256 bytes */
if(dict_len > 256)
dict_len = 256;
/* Start of dictionary */
dict = in - dict_len;
/* Size of the string to search for */
/* Limit string length to what we can fit in 14 bits, plus the minimum match length */
string_len = (int)(end - in);
if(string_len > 0x3FFF + MinMatchLength)
string_len = 0x3FFF + MinMatchLength;
/* ^ Limit string length to what we can fit in 14 bits, plus the minimum match length */
/* This will hold the length of our best match */
match_len = 0;
/* Iterate through the dictionary */
for(; dict_len; dict += 1, dict_len -= 1) {
for(dict = in - dict_len, match_len = 0; dict_len; dict++, dict_len -= 1) {
/* Find common prefix length with the string */
for(i = 0;; i++) {
if(i == string_len) {
@ -54,24 +53,30 @@ uxn_lz_compress(void *output, const void *input, int input_size)
dict_best = dict;
}
}
done_search:
/* Long enough? Use dictionary match */
/* CPY */
if(match_len >= MinMatchLength) {
/* More numeric range: treat 0 as 4, 1 as 5, etc. */
match_ctl = match_len - MinMatchLength;
/* Match is long enough to use 2 bytes for the size */
/* CPY2 */
if(match_ctl > 0x3F) {
*out++ = match_ctl >> 8 | 0x40 | 0x80; /* High byte of the size, with both flags set */
*out++ = match_ctl; /* Low byte of the size */
} else {
/* Set the "dictionary" flag */
*out++ = match_ctl | 0x80;
}
*out++ = in - dict_best - 1; /* Write offset into history. (0 is -1, 1 is -2, ...) */
*output_ptr++ = match_ctl >> 8 | 0xc0; /* High byte of the length, with both flags set */
*output_ptr++ = match_ctl; /* Low byte of the length */
/* CPY */
} else
*output_ptr++ = match_ctl | 0x80;
/* offset(0 is -1, 1 is -2, ...) */
*output_ptr++ = in - dict_best - 1;
in += match_len; /* Advance input by size of the match */
combine = 0; /* Disable combining previous literal, if any */
continue;
}
/* LIT */
/* Combine with previous literal */
if(combine) {
if(++*combine == 127)
@ -80,14 +85,14 @@ uxn_lz_compress(void *output, const void *input, int input_size)
/* Start a new literal */
else {
/* Store this address, and later use it to increment the literal size. */
combine = out++;
combine = output_ptr++;
/* The 0 here means literal of length 1. */
*combine = 0;
}
/* Write 1 literal byte from the input to the output. */
*out++ = *in++;
*output_ptr++ = *in++;
}
return (int)(out - (unsigned char *)output);
return (int)(output_ptr - mem);
}
int
@ -96,7 +101,7 @@ encode_ulz(FILE *src)
int length = 0;
mem = malloc(0x10000), raw = malloc(0x10000);
while(fread(raw + length, 1, 1, src)) ++length;
return uxn_lz_compress(mem, raw, length);
return uxn_lz_compress(raw, length);
}
int

82
cli/lz/ulzenc.tal Normal file
View File

@ -0,0 +1,82 @@
( ulz encoder )
|10 @Console &vector $2 &read $1 &pad $5 &write $1 &err $1
|a0 @File &vector $2 &success $2 &stat $2 &delete $1 &append $1 &name $2 &length $2 &read $2 &write $2
|0000
@src $30
@dst $30
@ptr $1
|0100
@ready-src ( -> )
;&await .Console/vector DEO2
BRK
&await ( -> )
.Console/read DEI .src skey ?ready-dst
BRK
@ready-dst ( -> )
;&await .Console/vector DEO2
BRK
&await ( -> )
.Console/read DEI .dst skey ?on-ready
BRK
@on-ready ( -> )
( | load raw )
;src .File/name DEO2
#4000 .File/length DEO2
;raw .File/read DEO2
;raw .File/success DEI2 uxn_lz_compress
( halt ) #800f DEO
BRK
@uxn_lz_compress ( input* length* -- )
ADD2k NIP2 SWP2
&w ( -- )
( body )
INC2 DUP2 phex #0a18 DEO
NEQ2k ?&w
JMP2r
(
@|stdlib )
@skey ( key buf -- proc )
OVR #21 LTH ?&eval
#00 SWP sput #00 JMP2r
&eval POP2 #01 JMP2r
@scap ( str* -- end* )
&w INC2 & LDAk ?&w
JMP2r
@sput ( chr str* -- )
scap/ INC2k #00 ROT ROT STA
STA
JMP2r
@getc ( -- b )
#0001 .File/length DEO2
;&b .File/read DEO2
[ LIT &b $1 ] JMP2r
@phex ( short* -- )
SWP phex/b
&b ( -- )
DUP #04 SFT phex/c
&c ( -- )
#0f AND DUP #09 GTH #27 MUL ADD [ LIT "0 ] ADD #18 DEO
JMP2r
(
@|memory )
@raw $4000
@compressed $4000