(lz) example project
This commit is contained in:
parent
7528540caf
commit
6080a8f68d
|
@ -0,0 +1,22 @@
|
|||
AlignAfterOpenBracket: DontAlign
|
||||
AlignEscapedNewlines: DontAlign
|
||||
AlignOperands: DontAlign
|
||||
AllowShortBlocksOnASingleLine: Always
|
||||
AllowShortCaseLabelsOnASingleLine: true
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AllowShortLoopsOnASingleLine: true
|
||||
AlwaysBreakAfterDefinitionReturnType: TopLevel
|
||||
BreakBeforeTernaryOperators: false
|
||||
BinPackArguments: false
|
||||
BinPackParameters: false
|
||||
BreakBeforeBraces: WebKit
|
||||
IndentCaseLabels: false
|
||||
TabWidth: 4
|
||||
IndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
UseTab: ForContinuationAndIndentation
|
||||
ColumnLimit: 0
|
||||
ReflowComments: false
|
||||
SortIncludes: false
|
||||
SpaceBeforeParens: false
|
|
@ -0,0 +1,86 @@
|
|||
Yo, listen up here's a story
|
||||
About a little guy
|
||||
That lives in a blue world
|
||||
And all day and all night
|
||||
And everything he sees is just blue
|
||||
Like him inside and outside
|
||||
Blue his house
|
||||
With a blue little window
|
||||
And a blue corvette
|
||||
And everything is blue for him
|
||||
And himself and everybody around
|
||||
Cause he ain't got nobody to listen to
|
||||
I'm blue
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
I'm blue
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
I have a blue house
|
||||
With a blue window
|
||||
Blue is the colour of all that I wear
|
||||
Blue are the streets
|
||||
And all the trees are too
|
||||
I have a girlfriend and she is so blue
|
||||
Blue are the people here
|
||||
That walk around
|
||||
Blue like my corvette its in and outside
|
||||
Blue are the words I say
|
||||
And what I think
|
||||
Blue are the feelings
|
||||
That live inside me
|
||||
I'm blue
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
I'm blue
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
I have a blue house
|
||||
With a blue window
|
||||
Blue is the colour of all that I wear
|
||||
Blue are the streets
|
||||
And all the trees are too
|
||||
I have a girlfriend and she is so blue
|
||||
Blue are the people here
|
||||
That walk around
|
||||
Blue like my corvette, its in and outside
|
||||
Blue are the words I say
|
||||
And what I think
|
||||
Blue are the feelings
|
||||
That live inside me
|
||||
I'm blue
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
I'm blue
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
||||
Da ba dee da ba di
|
|
@ -0,0 +1,56 @@
|
|||
Simple Uxn LZ Format
|
||||
====================
|
||||
|
||||
Goals:
|
||||
|
||||
* Anyone can implement it
|
||||
* Small source code size
|
||||
* Easy to implement from Uxn
|
||||
* Mildly better than RLE
|
||||
|
||||
Non-goals:
|
||||
|
||||
* High compression ratio
|
||||
* High compression speed
|
||||
|
||||
Format
|
||||
------
|
||||
|
||||
It's a stream of commands. The first byte encodes the first command. Read the commands from the input until there's no more input.
|
||||
|
||||
There are two commands. Literal and dictionary.
|
||||
|
||||
|
||||
```
|
||||
Byte 1 Byte 2+n
|
||||
┌─────────────────┐ ┌─────
|
||||
Literal │ 0 x x x x x x x │ │ ....
|
||||
(Always 1 byte) └─────────────────┘ └─────
|
||||
Length of literal Bytes to copy to output
|
||||
(Adjust by adding 1)
|
||||
|
||||
|
||||
Byte 1 Byte 2
|
||||
Dictionary ┌─────────────────┐ ┌─────────────────┐
|
||||
(2 bytes version)│ 1 0 x x x x x x │ │ x x x x x x x x │
|
||||
└─────────────────┘ └─────────────────┘
|
||||
Length of Offset into
|
||||
dictionary match dictionary
|
||||
(Adjust by adding 4) (Adjust by adding 1)
|
||||
|
||||
|
||||
Byte 1 Byte 2 Byte 3
|
||||
Dictionary ┌─────────────────┬─────────────────┐ ┌─────────────────┐
|
||||
(3 bytes version)│ 1 1 x x x x x x │ x x x x x x x x │ │ x x x x x x x x │
|
||||
└─────────────────┴─────────────────┘ └─────────────────┘
|
||||
Length of dictionary match Offset into
|
||||
(Adjust by adding 4) dictionary
|
||||
(Adjust by adding 1)
|
||||
```
|
||||
|
||||
* The maximum dictionary history size is 256 bytes.
|
||||
* Dictionary offsets should be treated as the distance from the end of last byte that was output.
|
||||
* Example: an offset of 0 means go back by 1 bytes into the history.
|
||||
* `a b c d e f|g`
|
||||
* Example: an offset of 5 means go back by 6 bytes into the history.
|
||||
* `a|b c d e f g`
|
|
@ -0,0 +1 @@
|
|||
abracadabra
|
|
@ -0,0 +1,266 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "lz.h"
|
||||
|
||||
enum { MinMatchLength = 4 };
|
||||
|
||||
int
|
||||
uxn_lz_compress(void *output, int output_size, const void *input, int input_size)
|
||||
{
|
||||
int dict_len, match_len, i, string_len, match_ctl;
|
||||
unsigned char *out = output, *combine = 0;
|
||||
const unsigned char *dict, *dict_best = 0, *in = input, *start = in,
|
||||
*end = in + input_size;
|
||||
while(in != end) {
|
||||
dict_len = (int)(in - start); /* Get available dictionary size (history of
|
||||
original output) */
|
||||
if(dict_len > 256)
|
||||
dict_len = 256; /* Limit history lookback to 256 bytes */
|
||||
dict = in - dict_len; /* Start of dictionary */
|
||||
string_len = (int)(end - in); /* Size of the string to search for */
|
||||
if(string_len > 0x3FFF + MinMatchLength)
|
||||
string_len = 0x3FFF + MinMatchLength;
|
||||
/* ^ Limit string length to what we can fit in 14 bits, plus the minimum
|
||||
* match length */
|
||||
match_len = 0; /* This will hold the length of our best match */
|
||||
for(; dict_len;
|
||||
dict += 1, dict_len -= 1) /* Iterate through the dictionary */
|
||||
{
|
||||
for(i = 0;; i++) /* Find common prefix length with the string */
|
||||
{
|
||||
if(i == string_len) {
|
||||
match_len = i;
|
||||
dict_best = dict;
|
||||
goto done_search;
|
||||
}
|
||||
/* ^ If we reach the end of the string, this is the best possible match.
|
||||
* End. */
|
||||
if(in[i] != dict[i % dict_len])
|
||||
break; /* Dictionary repeats if we hit the end */
|
||||
}
|
||||
if(i > match_len) {
|
||||
match_len = i;
|
||||
dict_best = dict;
|
||||
}
|
||||
}
|
||||
done_search:
|
||||
if(match_len >= MinMatchLength) /* Long enough? Use dictionary match */
|
||||
{
|
||||
if((output_size -= 2) < 0)
|
||||
goto overflow;
|
||||
match_ctl =
|
||||
match_len -
|
||||
MinMatchLength; /* More numeric range: treat 0 as 4, 1 as 5, etc. */
|
||||
if(match_ctl >
|
||||
0x3F) /* Match is long enough to use 2 bytes for the size */
|
||||
{
|
||||
if((output_size -= 1) < 0)
|
||||
goto overflow;
|
||||
*out++ = match_ctl >> 8 | 0x40 |
|
||||
0x80; /* High byte of the size, with both flags set */
|
||||
*out++ = match_ctl; /* Low byte of the size */
|
||||
} else /* Use 1 byte for the size */
|
||||
{
|
||||
*out++ = match_ctl | 0x80; /* Set the "dictionary" flag */
|
||||
}
|
||||
*out++ = in - dict_best -
|
||||
1; /* Write offset into history. (0 is -1, 1 is -2, ...) */
|
||||
in += match_len; /* Advance input by size of the match */
|
||||
combine = 0; /* Disable combining previous literal, if any */
|
||||
continue;
|
||||
}
|
||||
if(combine) /* Combine with previous literal */
|
||||
{
|
||||
if((output_size -= 1) < 0)
|
||||
goto overflow;
|
||||
if(++*combine == 127)
|
||||
combine = 0; /* If the literal hits its size limit, terminate it. */
|
||||
} else /* Start a new literal */
|
||||
{
|
||||
if((output_size -= 2) < 0)
|
||||
goto overflow;
|
||||
combine = out++; /* Store this address, and later use it to increment the
|
||||
literal size. */
|
||||
*combine = 0; /* The 0 here means literal of length 1. */
|
||||
}
|
||||
*out++ = *in++; /* Write 1 literal byte from the input to the output. */
|
||||
}
|
||||
return (int)(out - (unsigned char *)output);
|
||||
overflow:
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
uxn_lz_expand(void *output, int output_size, const void *input, int input_size)
|
||||
{
|
||||
int num, offset, written = 0;
|
||||
unsigned char *out = output;
|
||||
const unsigned char *from, *in = input;
|
||||
while(input_size) {
|
||||
num = *in++;
|
||||
if(num > 127) /* Dictionary */
|
||||
{
|
||||
if((input_size -= 1) < 0)
|
||||
goto malformed;
|
||||
num &= 0x7F;
|
||||
if(num & 0x40) {
|
||||
if((input_size -= 1) < 0)
|
||||
goto malformed;
|
||||
num = *in++ | num << 8 & 0x3FFF;
|
||||
}
|
||||
num += MinMatchLength;
|
||||
offset = *in++ + 1;
|
||||
if(offset > written)
|
||||
goto malformed;
|
||||
from = out + written - offset;
|
||||
} else /* Literal */
|
||||
{
|
||||
input_size -= ++num;
|
||||
if(input_size < 0)
|
||||
goto malformed;
|
||||
from = in, in += num;
|
||||
}
|
||||
if(written + num > output_size)
|
||||
goto overflow;
|
||||
while(num--)
|
||||
out[written++] = *from++;
|
||||
}
|
||||
return written;
|
||||
overflow:
|
||||
malformed:
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
uxn_lz_expand_stream(struct uxn_lz_expand_t *a)
|
||||
{
|
||||
/* Copy struct to stack variables for compiler optimizations */
|
||||
unsigned char *next_in = a->next_in, *next_out = a->next_out;
|
||||
int avail_in = a->avail_in, avail_out = a->avail_out;
|
||||
int dict_len = a->dict_len, copy_num = a->copy_num;
|
||||
unsigned char dict_read_pos = a->dict_read_pos,
|
||||
dict_write_pos = a->dict_write_pos, *dict = a->dict;
|
||||
int result = 0;
|
||||
switch(a->state) {
|
||||
case 0:
|
||||
for(; avail_in;) {
|
||||
copy_num = *next_in++;
|
||||
avail_in--;
|
||||
if(copy_num > 127) /* Dictionary */
|
||||
{
|
||||
copy_num &= 0x7F;
|
||||
if(copy_num & 0x40) {
|
||||
case 1:
|
||||
if(!avail_in) {
|
||||
a->state = 1;
|
||||
goto need_more;
|
||||
}
|
||||
avail_in--;
|
||||
copy_num = *next_in++ | copy_num << 8 & 0x3FFF;
|
||||
}
|
||||
copy_num += MinMatchLength;
|
||||
case 2:
|
||||
if(!avail_in) {
|
||||
a->state = 2;
|
||||
goto need_more;
|
||||
}
|
||||
avail_in--;
|
||||
dict_read_pos = *next_in++ + 1;
|
||||
if(dict_read_pos > dict_len) {
|
||||
a->state = 5;
|
||||
result = -1;
|
||||
goto flush;
|
||||
} /* Malformed */
|
||||
dict_read_pos = dict_write_pos - dict_read_pos;
|
||||
if((dict_len += copy_num) > 256)
|
||||
dict_len = 256;
|
||||
case 3:
|
||||
do {
|
||||
if(!avail_out) {
|
||||
a->state = 3;
|
||||
goto need_more;
|
||||
}
|
||||
*next_out++ = dict[dict_write_pos++] = dict[dict_read_pos++];
|
||||
avail_out--;
|
||||
} while(--copy_num);
|
||||
} else /* Literal */
|
||||
{
|
||||
copy_num++;
|
||||
if((dict_len += copy_num) > 256)
|
||||
dict_len = 256;
|
||||
case 4:
|
||||
do {
|
||||
if(!avail_in || !avail_out) {
|
||||
a->state = 4;
|
||||
goto need_more;
|
||||
}
|
||||
*next_out++ = dict[dict_write_pos++] = *next_in++;
|
||||
avail_in--, avail_out--;
|
||||
} while(--copy_num);
|
||||
}
|
||||
}
|
||||
a->state = 0;
|
||||
case 5:;
|
||||
}
|
||||
need_more:
|
||||
flush:
|
||||
/* Flush stack variables back to struct */
|
||||
a->next_in = next_in, a->next_out = next_out;
|
||||
a->avail_in = avail_in, a->avail_out = avail_out;
|
||||
a->dict_len = dict_len, a->copy_num = copy_num;
|
||||
a->dict_read_pos = dict_read_pos, a->dict_write_pos = dict_write_pos;
|
||||
return result;
|
||||
}
|
||||
|
||||
unsigned int
|
||||
uxn_checksum(unsigned int seed, void *bytes, unsigned int bytes_size)
|
||||
{
|
||||
unsigned int x = seed >> 16, y = seed, c;
|
||||
unsigned char *in = bytes, *end = in + bytes_size;
|
||||
for(; in != end; in++) {
|
||||
c = *in << 8 | *in;
|
||||
x = x * 0x2443 + c;
|
||||
y = y * 0x118d + c;
|
||||
}
|
||||
return x << 16 | (y & 0xFFFF);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int enc;
|
||||
char *s;
|
||||
|
||||
char *my_byte_buffer = malloc(1000000);
|
||||
FILE *fp = fopen("example.txt", "rb");
|
||||
char buffer[1000000];
|
||||
size_t i;
|
||||
|
||||
for(i = 0; i < 1000000; ++i) {
|
||||
int c = getc(fp);
|
||||
if(c == EOF) {
|
||||
buffer[i] = 0x00;
|
||||
break;
|
||||
}
|
||||
buffer[i] = c;
|
||||
printf("%02x\n", c);
|
||||
}
|
||||
|
||||
int res = uxn_lz_compress(my_byte_buffer, 1000000, &buffer, i);
|
||||
if(res < 0)
|
||||
printf("ERROR\n");
|
||||
printf("!!!%d -> %d\n", i, res);
|
||||
|
||||
// Other way
|
||||
|
||||
char *output2 = malloc(1000000);
|
||||
int res2 = uxn_lz_expand(output2, 1000000, &buffer, i);
|
||||
|
||||
printf("!!!%d -> %d\n", res, res2);
|
||||
|
||||
printf("%s\n", output2);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue