Apply a minor speed optimization to LZMA decoder.
This commit is contained in:
parent
f310c50286
commit
bfde3b24a5
|
@ -179,43 +179,41 @@ decode_dummy(const lzma_coder *restrict coder,
|
||||||
coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0));
|
coder->literal_coder, now_pos, lz_get_byte(coder->lz, 0));
|
||||||
uint32_t symbol = 1;
|
uint32_t symbol = 1;
|
||||||
|
|
||||||
if (!is_char_state(state)) {
|
if (is_char_state(state)) {
|
||||||
// Decode literal with match byte.
|
// Decode literal without match byte.
|
||||||
|
do {
|
||||||
|
if_bit_0(subcoder[symbol]) {
|
||||||
|
update_bit_0_dummy();
|
||||||
|
symbol <<= 1;
|
||||||
|
} else {
|
||||||
|
update_bit_1_dummy();
|
||||||
|
symbol = (symbol << 1) | 1;
|
||||||
|
}
|
||||||
|
} while (symbol < 0x100);
|
||||||
|
|
||||||
assert(rep0 != UINT32_MAX);
|
} else {
|
||||||
|
// Decode literal with match byte.
|
||||||
uint32_t match_byte = lz_get_byte(coder->lz, rep0);
|
uint32_t match_byte = lz_get_byte(coder->lz, rep0);
|
||||||
|
uint32_t subcoder_offset = 0x100;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
match_byte <<= 1;
|
match_byte <<= 1;
|
||||||
const uint32_t match_bit = match_byte & 0x100;
|
const uint32_t match_bit = match_byte & subcoder_offset;
|
||||||
const uint32_t subcoder_index = 0x100 + match_bit + symbol;
|
const uint32_t subcoder_index
|
||||||
|
= subcoder_offset + match_bit + symbol;
|
||||||
|
|
||||||
if_bit_0(subcoder[subcoder_index]) {
|
if_bit_0(subcoder[subcoder_index]) {
|
||||||
update_bit_0_dummy();
|
update_bit_0_dummy();
|
||||||
symbol <<= 1;
|
symbol <<= 1;
|
||||||
if (match_bit != 0)
|
subcoder_offset &= ~match_bit;
|
||||||
break;
|
|
||||||
} else {
|
} else {
|
||||||
update_bit_1_dummy();
|
update_bit_1_dummy();
|
||||||
symbol = (symbol << 1) | 1;
|
symbol = (symbol << 1) | 1;
|
||||||
if (match_bit == 0)
|
subcoder_offset &= match_bit;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
} while (symbol < 0x100);
|
} while (symbol < 0x100);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode literal without match byte. This is also
|
|
||||||
// the tail of the with-match-byte function.
|
|
||||||
while (symbol < 0x100) {
|
|
||||||
if_bit_0(subcoder[symbol]) {
|
|
||||||
update_bit_0_dummy();
|
|
||||||
symbol <<= 1;
|
|
||||||
} else {
|
|
||||||
update_bit_1_dummy();
|
|
||||||
symbol = (symbol << 1) | 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -366,43 +364,46 @@ decode_real(lzma_coder *restrict coder, const uint8_t *restrict in,
|
||||||
now_pos, lz_get_byte(coder->lz, 0));
|
now_pos, lz_get_byte(coder->lz, 0));
|
||||||
uint32_t symbol = 1;
|
uint32_t symbol = 1;
|
||||||
|
|
||||||
if (!is_char_state(state)) {
|
if (is_char_state(state)) {
|
||||||
// Decode literal with match byte.
|
// Decode literal without match byte.
|
||||||
|
do {
|
||||||
|
if_bit_0(subcoder[symbol]) {
|
||||||
|
update_bit_0(subcoder[symbol]);
|
||||||
|
symbol <<= 1;
|
||||||
|
} else {
|
||||||
|
update_bit_1(subcoder[symbol]);
|
||||||
|
symbol = (symbol << 1) | 1;
|
||||||
|
}
|
||||||
|
} while (symbol < 0x100);
|
||||||
|
|
||||||
assert(rep0 != UINT32_MAX);
|
} else {
|
||||||
|
// Decode literal with match byte.
|
||||||
|
//
|
||||||
|
// The usage of subcoder_offset allows omitting some
|
||||||
|
// branches, which should give tiny speed improvement on
|
||||||
|
// some CPUs. subcoder_offset gets set to zero if match_bit
|
||||||
|
// didn't match.
|
||||||
uint32_t match_byte = lz_get_byte(coder->lz, rep0);
|
uint32_t match_byte = lz_get_byte(coder->lz, rep0);
|
||||||
|
uint32_t subcoder_offset = 0x100;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
match_byte <<= 1;
|
match_byte <<= 1;
|
||||||
const uint32_t match_bit = match_byte & 0x100;
|
const uint32_t match_bit = match_byte & subcoder_offset;
|
||||||
const uint32_t subcoder_index = 0x100 + match_bit + symbol;
|
const uint32_t subcoder_index
|
||||||
|
= subcoder_offset + match_bit + symbol;
|
||||||
|
|
||||||
if_bit_0(subcoder[subcoder_index]) {
|
if_bit_0(subcoder[subcoder_index]) {
|
||||||
update_bit_0(subcoder[subcoder_index]);
|
update_bit_0(subcoder[subcoder_index]);
|
||||||
symbol <<= 1;
|
symbol <<= 1;
|
||||||
if (match_bit != 0)
|
subcoder_offset &= ~match_bit;
|
||||||
break;
|
|
||||||
} else {
|
} else {
|
||||||
update_bit_1(subcoder[subcoder_index]);
|
update_bit_1(subcoder[subcoder_index]);
|
||||||
symbol = (symbol << 1) | 1;
|
symbol = (symbol << 1) | 1;
|
||||||
if (match_bit == 0)
|
subcoder_offset &= match_bit;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
} while (symbol < 0x100);
|
} while (symbol < 0x100);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode literal without match byte. This is also
|
|
||||||
// the tail of the with-match-byte function.
|
|
||||||
while (symbol < 0x100) {
|
|
||||||
if_bit_0(subcoder[symbol]) {
|
|
||||||
update_bit_0(subcoder[symbol]);
|
|
||||||
symbol <<= 1;
|
|
||||||
} else {
|
|
||||||
update_bit_1(subcoder[symbol]);
|
|
||||||
symbol = (symbol << 1) | 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Put the decoded byte to the dictionary, update the
|
// Put the decoded byte to the dictionary, update the
|
||||||
// decoder state, and start a new decoding loop.
|
// decoder state, and start a new decoding loop.
|
||||||
coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol);
|
coder->lz.dict[coder->lz.pos++] = (uint8_t)(symbol);
|
||||||
|
|
Loading…
Reference in New Issue