From 6b3fe5706708308f105dc8f1771b0678929424c9 Mon Sep 17 00:00:00 2001 From: neauoire Date: Sat, 18 Nov 2023 11:16:27 -0800 Subject: [PATCH] (lz) Starting uxntal encoder --- cli/lz/build.sh | 17 +++++++++- cli/lz/ulzenc.c | 57 +++++++++++++++++--------------- cli/lz/ulzenc.tal | 82 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+), 27 deletions(-) create mode 100644 cli/lz/ulzenc.tal diff --git a/cli/lz/build.sh b/cli/lz/build.sh index c9fc8c1..c591569 100755 --- a/cli/lz/build.sh +++ b/cli/lz/build.sh @@ -3,7 +3,6 @@ RELEASE_flags="-DNDEBUG -O2 -g0 -s" DEBUG_flags="-std=c89 -D_POSIX_C_SOURCE=199309L -DDEBUG -Wall -Wno-unknown-pragmas -Wpedantic -Wshadow -Wextra -Werror=implicit-int -Werror=incompatible-pointer-types -Werror=int-conversion -Wvla -g -Og -fsanitize=address -fsanitize=undefined" - LIN="uxncli $HOME/roms/uxnlin.rom" ASM="uxncli $HOME/roms/drifblim.rom" @@ -12,6 +11,7 @@ then clang-format -i ulzdec.c clang-format -i ulzenc.c $LIN decoder.tal + $LIN encoder.tal fi # Building @@ -19,6 +19,15 @@ fi cc ulzdec.c $DEBUG_flags -o ulzdec cc ulzenc.c $DEBUG_flags -o ulzenc $ASM ulzdec.tal ulzdec.rom +$ASM ulzenc.tal ulzenc.rom + +if [[ "$*" == *"--install"* ]] +then + cp ulzenc ~/bin + cp ulzdec ~/bin + cp ulzdec.rom ~/roms + cp ulzenc.rom ~/roms +fi # Make compressed file @@ -36,6 +45,12 @@ uxncli ulzdec.rom a.ulz b.bin && cat b.bin echo "" echo "" +echo "================================" + +# ./ulzenc example.txt a.ulz +uxncli ulzenc.rom example.txt a.ulz +uxncli ~/roms/hx.rom a.ulz + rm ./ulzdec rm ./ulzenc rm ./a.ulz diff --git a/cli/lz/ulzenc.c b/cli/lz/ulzenc.c index bde1baf..f31e1a3 100644 --- a/cli/lz/ulzenc.c +++ b/cli/lz/ulzenc.c @@ -12,31 +12,30 @@ error(const char *name, const char *msg) enum { MinMatchLength = 4 }; -char *raw, *mem; +unsigned char *raw, *mem; int -uxn_lz_compress(void *output, const void *input, int input_size) +uxn_lz_compress(const void *input, int input_size) { int i, dict_len, match_len, string_len, match_ctl; - unsigned char *out = output, *combine = 0; - const unsigned char *dict, *dict_best = 0, *in = input, *start = in, *end = in + input_size; + unsigned char *output_ptr = mem, *combine = 0; + const unsigned char *dict, *dict_best = 0; + const unsigned char *in = input, *start = in, *end = in + input_size; while(in != end) { + /* Get available dictionary size (history of original output) */ dict_len = (int)(in - start); - /* Limit history lookback to 256 bytes */ if(dict_len > 256) dict_len = 256; - /* Start of dictionary */ - dict = in - dict_len; + /* Size of the string to search for */ + /* Limit string length to what we can fit in 14 bits, plus the minimum match length */ string_len = (int)(end - in); if(string_len > 0x3FFF + MinMatchLength) string_len = 0x3FFF + MinMatchLength; - /* ^ Limit string length to what we can fit in 14 bits, plus the minimum match length */ - /* This will hold the length of our best match */ - match_len = 0; + /* Iterate through the dictionary */ - for(; dict_len; dict += 1, dict_len -= 1) { + for(dict = in - dict_len, match_len = 0; dict_len; dict++, dict_len -= 1) { /* Find common prefix length with the string */ for(i = 0;; i++) { if(i == string_len) { @@ -54,24 +53,30 @@ uxn_lz_compress(void *output, const void *input, int input_size) dict_best = dict; } } + done_search: - /* Long enough? Use dictionary match */ + + /* CPY */ + if(match_len >= MinMatchLength) { /* More numeric range: treat 0 as 4, 1 as 5, etc. */ match_ctl = match_len - MinMatchLength; - /* Match is long enough to use 2 bytes for the size */ + /* CPY2 */ if(match_ctl > 0x3F) { - *out++ = match_ctl >> 8 | 0x40 | 0x80; /* High byte of the size, with both flags set */ - *out++ = match_ctl; /* Low byte of the size */ - } else { - /* Set the "dictionary" flag */ - *out++ = match_ctl | 0x80; - } - *out++ = in - dict_best - 1; /* Write offset into history. (0 is -1, 1 is -2, ...) */ - in += match_len; /* Advance input by size of the match */ - combine = 0; /* Disable combining previous literal, if any */ + *output_ptr++ = match_ctl >> 8 | 0xc0; /* High byte of the length, with both flags set */ + *output_ptr++ = match_ctl; /* Low byte of the length */ + /* CPY */ + } else + *output_ptr++ = match_ctl | 0x80; + /* offset(0 is -1, 1 is -2, ...) */ + *output_ptr++ = in - dict_best - 1; + in += match_len; /* Advance input by size of the match */ + combine = 0; /* Disable combining previous literal, if any */ continue; } + + /* LIT */ + /* Combine with previous literal */ if(combine) { if(++*combine == 127) @@ -80,14 +85,14 @@ uxn_lz_compress(void *output, const void *input, int input_size) /* Start a new literal */ else { /* Store this address, and later use it to increment the literal size. */ - combine = out++; + combine = output_ptr++; /* The 0 here means literal of length 1. */ *combine = 0; } /* Write 1 literal byte from the input to the output. */ - *out++ = *in++; + *output_ptr++ = *in++; } - return (int)(out - (unsigned char *)output); + return (int)(output_ptr - mem); } int @@ -96,7 +101,7 @@ encode_ulz(FILE *src) int length = 0; mem = malloc(0x10000), raw = malloc(0x10000); while(fread(raw + length, 1, 1, src)) ++length; - return uxn_lz_compress(mem, raw, length); + return uxn_lz_compress(raw, length); } int diff --git a/cli/lz/ulzenc.tal b/cli/lz/ulzenc.tal new file mode 100644 index 0000000..58aed18 --- /dev/null +++ b/cli/lz/ulzenc.tal @@ -0,0 +1,82 @@ +( ulz encoder ) + +|10 @Console &vector $2 &read $1 &pad $5 &write $1 &err $1 +|a0 @File &vector $2 &success $2 &stat $2 &delete $1 &append $1 &name $2 &length $2 &read $2 &write $2 + +|0000 + + @src $30 + @dst $30 + @ptr $1 + +|0100 + +@ready-src ( -> ) + ;&await .Console/vector DEO2 + BRK + &await ( -> ) + .Console/read DEI .src skey ?ready-dst + BRK + +@ready-dst ( -> ) + ;&await .Console/vector DEO2 + BRK + &await ( -> ) + .Console/read DEI .dst skey ?on-ready + BRK + +@on-ready ( -> ) + ( | load raw ) + ;src .File/name DEO2 + #4000 .File/length DEO2 + ;raw .File/read DEO2 + ;raw .File/success DEI2 uxn_lz_compress + ( halt ) #800f DEO + BRK + +@uxn_lz_compress ( input* length* -- ) + ADD2k NIP2 SWP2 + &w ( -- ) + ( body ) + INC2 DUP2 phex #0a18 DEO + + NEQ2k ?&w + JMP2r + +( +@|stdlib ) + +@skey ( key buf -- proc ) + OVR #21 LTH ?&eval + #00 SWP sput #00 JMP2r + &eval POP2 #01 JMP2r + +@scap ( str* -- end* ) + &w INC2 & LDAk ?&w + JMP2r + +@sput ( chr str* -- ) + scap/ INC2k #00 ROT ROT STA + STA + JMP2r + +@getc ( -- b ) + #0001 .File/length DEO2 + ;&b .File/read DEO2 + [ LIT &b $1 ] JMP2r + +@phex ( short* -- ) + SWP phex/b + &b ( -- ) + DUP #04 SFT phex/c + &c ( -- ) + #0f AND DUP #09 GTH #27 MUL ADD [ LIT "0 ] ADD #18 DEO + JMP2r + +( +@|memory ) + +@raw $4000 + +@compressed $4000 +