Implemented LZMA_SYNC_FLUSH support to the Subblock encoder.

The API for handing Subfilters was changed to make it
consistent with LZMA_SYNC_FLUSH.

A few sanity checks were added for Subfilter handling. Some
small bugs were fixed. More comments were added.
This commit is contained in:
Lasse Collin 2008-01-19 21:16:33 +02:00
parent 23c227a864
commit e141fe1895
2 changed files with 214 additions and 78 deletions

View File

@ -95,9 +95,10 @@ typedef struct {
* input_offset % alignment == output_offset % alignment * input_offset % alignment == output_offset % alignment
* *
* The Subblock filter assumes that the first output byte will be * The Subblock filter assumes that the first output byte will be
* written to a position in the output stream that is properly aligned. * written to a position in the output stream that is properly
* * aligned. This requirement is automatically met when the start
* FIXME desc * offset of the Stream or Block is correctly told to Block or
* Stream encoder.
*/ */
uint32_t alignment; uint32_t alignment;
# define LZMA_SUBBLOCK_ALIGNMENT_MIN 1 # define LZMA_SUBBLOCK_ALIGNMENT_MIN 1
@ -161,16 +162,17 @@ typedef struct {
* *
* When subfilter_mode is LZMA_SUBFILTER_NONE, the application may * When subfilter_mode is LZMA_SUBFILTER_NONE, the application may
* put Subfilter options to subfilter_options structure, and then * put Subfilter options to subfilter_options structure, and then
* set subfilter_mode to LZMA_SUBFILTER_SET. This implies setting * set subfilter_mode to LZMA_SUBFILTER_SET. No new input data will
* flush to true. No new input data will be read until the Subfilter * be read until the Subfilter has been enabled. Once the Subfilter
* has been enabled. Once the Subfilter has been enabled, liblzma * has been enabled, liblzma will set subfilter_mode to
* will set subfilter_mode to LZMA_SUBFILTER_RUN. * LZMA_SUBFILTER_RUN.
* *
* When subfilter_mode is LZMA_SUBFILTER_RUN, the application may * When subfilter_mode is LZMA_SUBFILTER_RUN, the application may
* set subfilter_mode to LZMA_SUBFILTER_FINISH. No new input data * set subfilter_mode to LZMA_SUBFILTER_FINISH. All the input
* will be read until the Subfilter has been finished. Once the * currently available will be encoded before unsetting the
* Subfilter has been finished, liblzma will set subfilter_mode * Subfilter. Application must not change the amount of available
* to LZMA_SUBFILTER_NONE. * input until the Subfilter has finished. Once the Subfilter has
* finished, liblzma will set subfilter_mode to LZMA_SUBFILTER_NONE.
* *
* If the intent is to have Subfilter enabled to the very end of * If the intent is to have Subfilter enabled to the very end of
* the data, it is not needed to separately disable Subfilter with * the data, it is not needed to separately disable Subfilter with
@ -178,6 +180,11 @@ typedef struct {
* of lzma_code() will make the Subblock encoder to disable the * of lzma_code() will make the Subblock encoder to disable the
* Subfilter once all the data has been ran through the Subfilter. * Subfilter once all the data has been ran through the Subfilter.
* *
* After the first call with LZMA_SYNC_FLUSH or LZMA_FINISH, the
* application must not change subfilter_mode until LZMA_STREAM_END.
* Setting LZMA_SUBFILTER_SET/LZMA_SUBFILTER_FINISH and
* LZMA_SYNC_FLUSH/LZMA_FINISH _at the same time_ is fine.
*
* \note This variable is ignored if allow_subfilters is false. * \note This variable is ignored if allow_subfilters is false.
*/ */
lzma_subfilter_mode subfilter_mode; lzma_subfilter_mode subfilter_mode;

View File

@ -3,7 +3,7 @@
/// \file subblock_encoder.c /// \file subblock_encoder.c
/// \brief Encoder of the Subblock filter /// \brief Encoder of the Subblock filter
// //
// Copyright (C) 2007 Lasse Collin // Copyright (C) 2007, 2008 Lasse Collin
// //
// This library is free software; you can redistribute it and/or // This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public // modify it under the terms of the GNU Lesser General Public
@ -61,27 +61,57 @@ struct lzma_coder_s {
SEQ_SUBFILTER_FLAGS, SEQ_SUBFILTER_FLAGS,
} sequence; } sequence;
/// Pointer to the options given by the application. This is used
/// for two-way communication with the application.
lzma_options_subblock *options; lzma_options_subblock *options;
/// Position in various arrays.
size_t pos; size_t pos;
/// Holds subblock.size - 1 or rle.size - 1 when encoding size
/// of Data or Repeat Count.
uint32_t tmp; uint32_t tmp;
struct { struct {
/// This is a copy of options->alignment, or
/// LZMA_SUBBLOCK_ALIGNMENT_DEFAULT if options is NULL.
uint32_t multiple; uint32_t multiple;
/// Number of input bytes that we have already read but
/// not yet started writing out.
uint32_t in_pending; uint32_t in_pending;
/// Number of input bytes which we have processed and started
/// writing out. 32-bit integer is enough since we care only
/// about the lowest bits when fixing alignment.
uint32_t in_pos; uint32_t in_pos;
/// Number of bytes written out.
uint32_t out_pos; uint32_t out_pos;
} alignment; } alignment;
struct { struct {
/// Pointer to allocated buffer holding the Data field
/// of Subblock Type "Data".
uint8_t *data; uint8_t *data;
/// Number of bytes in the buffer.
size_t size; size_t size;
/// Allocated size of the buffer.
size_t limit; size_t limit;
} subblock; } subblock;
struct { struct {
/// Buffer to hold the data that may be coded with
/// Subblock Type `Repeating Data'.
uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX]; uint8_t buffer[LZMA_SUBBLOCK_RLE_MAX];
/// Number of bytes in buffer[].
size_t size; size_t size;
/// Number of times the first `size' bytes of buffer[]
/// will be repeated.
lzma_vli count; lzma_vli count;
} rle; } rle;
@ -90,15 +120,38 @@ struct lzma_coder_s {
SUB_NONE, SUB_NONE,
SUB_SET, SUB_SET,
SUB_RUN, SUB_RUN,
SUB_FLUSH,
SUB_FINISH, SUB_FINISH,
SUB_END_MARKER, SUB_END_MARKER,
} mode; } mode;
/// This is a copy of options->allow_subfilters. We use
/// this to verify that the application doesn't change
/// the value of allow_subfilters.
bool allow;
/// When this is true, application is not allowed to modify
/// options->subblock_mode. We may still modify it here.
bool mode_locked;
/// True if we have encoded at least one byte of data with
/// the Subfilter.
bool got_input; bool got_input;
/// Track the amount of input available once
/// LZMA_SUBFILTER_FINISH has been enabled.
/// This is needed for sanity checking (kind
/// of duplicating what common/code.c does).
size_t in_avail;
/// Buffer for the Filter Flags field written after
/// the `Set Subfilter' indicator.
uint8_t *flags; uint8_t *flags;
/// Size of Filter Flags field.
uint32_t flags_size; uint32_t flags_size;
/// Pointers to Subfilter.
lzma_next_coder subcoder; lzma_next_coder subcoder;
} subfilter; } subfilter;
@ -234,8 +287,16 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
size_t in_size, uint8_t *restrict out, size_t in_size, uint8_t *restrict out,
size_t *restrict out_pos, size_t out_size, lzma_action action) size_t *restrict out_pos, size_t out_size, lzma_action action)
{ {
// Changing allow_subfilter is not allowed.
if (coder->options != NULL && coder->subfilter.allow
!= coder->options->allow_subfilters)
return LZMA_PROG_ERROR;
// Check if we need to do something special with the Subfilter. // Check if we need to do something special with the Subfilter.
if (coder->options != NULL && coder->options->allow_subfilters) { if (coder->subfilter.allow) {
assert(coder->options != NULL);
// See if subfilter_mode has been changed.
switch (coder->options->subfilter_mode) { switch (coder->options->subfilter_mode) {
case LZMA_SUBFILTER_NONE: case LZMA_SUBFILTER_NONE:
if (coder->subfilter.mode != SUB_NONE) if (coder->subfilter.mode != SUB_NONE)
@ -243,8 +304,9 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
break; break;
case LZMA_SUBFILTER_SET: case LZMA_SUBFILTER_SET:
if (coder->subfilter.mode != SUB_NONE) if (coder->subfilter.mode_locked
return LZMA_HEADER_ERROR; || coder->subfilter.mode != SUB_NONE)
return LZMA_PROG_ERROR;
coder->subfilter.mode = SUB_SET; coder->subfilter.mode = SUB_SET;
coder->subfilter.got_input = false; coder->subfilter.got_input = false;
@ -257,28 +319,47 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
case LZMA_SUBFILTER_RUN: case LZMA_SUBFILTER_RUN:
if (coder->subfilter.mode != SUB_RUN) if (coder->subfilter.mode != SUB_RUN)
return LZMA_PROG_ERROR; return LZMA_PROG_ERROR;
break; break;
case LZMA_SUBFILTER_FINISH: case LZMA_SUBFILTER_FINISH: {
if (coder->subfilter.mode == SUB_RUN) const size_t in_avail = in_size - *in_pos;
if (coder->subfilter.mode == SUB_RUN) {
if (coder->subfilter.mode_locked)
return LZMA_PROG_ERROR;
coder->subfilter.mode = SUB_FINISH; coder->subfilter.mode = SUB_FINISH;
else if (coder->subfilter.mode != SUB_FINISH) coder->subfilter.in_avail = in_avail;
return LZMA_PROG_ERROR;
if (!coder->subfilter.got_input) } else if (coder->subfilter.mode != SUB_FINISH
|| coder->subfilter.in_avail
!= in_avail) {
return LZMA_PROG_ERROR; return LZMA_PROG_ERROR;
}
break; break;
}
default: default:
return LZMA_HEADER_ERROR; return LZMA_HEADER_ERROR;
} }
// If we are sync-flushing or finishing, the application may
// no longer change subfilter_mode. Note that this check is
// done after checking the new subfilter_mode above; this
// way the application may e.g. set LZMA_SUBFILTER_SET and
// LZMA_SYNC_FLUSH at the same time, but it cannot modify
// subfilter_mode on the later lzma_code() calls before
// we have returned LZMA_STREAM_END.
if (action != LZMA_RUN)
coder->subfilter.mode_locked = true;
} }
// Main loop // Main loop
while (*out_pos < out_size) while (*out_pos < out_size)
switch (coder->sequence) { switch (coder->sequence) {
case SEQ_FILL: { case SEQ_FILL:
// Grab the new Subblock Data Size and reallocate the buffer. // Grab the new Subblock Data Size and reallocate the buffer.
if (coder->subblock.size == 0 && coder->options != NULL if (coder->subblock.size == 0 && coder->options != NULL
&& coder->options->subblock_data_size && coder->options->subblock_data_size
@ -297,44 +378,77 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
&coder->subblock.size, &coder->subblock.size,
coder->subblock.limit); coder->subblock.limit);
} else { // If we ran out of input before the whole buffer
const size_t in_start = *in_pos; // was filled, return to application.
lzma_ret ret; if (coder->subblock.size < coder->subblock.limit
&& action == LZMA_RUN)
return LZMA_OK;
if (coder->subfilter.mode == SUB_FINISH) { } else {
// Let the Subfilter write out pending data, assert(coder->options->subfilter_mode
// but don't give it any new input anymore. != LZMA_SUBFILTER_SET);
size_t dummy = 0;
ret = coder->subfilter.subcoder.code(coder // Using LZMA_FINISH automatically toggles
->subfilter.subcoder.coder, // LZMA_SUBFILTER_FINISH.
allocator, NULL, &dummy, 0, //
coder->subblock.data, // NOTE: It is possible that application had set
&coder->subblock.size, // LZMA_SUBFILTER_SET and LZMA_FINISH at the same
coder->subblock.limit, // time. In that case it is possible that we will
LZMA_FINISH); // cycle to LZMA_SUBFILTER_RUN, LZMA_SUBFILTER_FINISH,
} else { // and back to LZMA_SUBFILTER_NONE in a single
// Give our input data to the Subfilter. Note // Subblock encoder function call.
// that action can be LZMA_FINISH. In that if (action == LZMA_FINISH) {
// case, we filter everything until the end coder->options->subfilter_mode
// of the input. The application isn't required = LZMA_SUBFILTER_FINISH;
// to separately set LZMA_SUBBLOCK_FINISH. coder->subfilter.mode = SUB_FINISH;
ret = coder->subfilter.subcoder.code(coder
->subfilter.subcoder.coder,
allocator, in, in_pos, in_size,
coder->subblock.data,
&coder->subblock.size,
coder->subblock.limit,
action);
} }
const size_t in_used = *in_pos - in_start; const size_t in_start = *in_pos;
const lzma_ret ret = coder->subfilter.subcoder.code(
coder->subfilter.subcoder.coder,
allocator, in, in_pos, in_size,
coder->subblock.data,
&coder->subblock.size,
coder->subblock.limit,
coder->subfilter.mode == SUB_FINISH
? LZMA_FINISH : action);
const size_t in_used = *in_pos - in_start;
coder->alignment.in_pending += in_used;
if (in_used > 0) if (in_used > 0)
coder->subfilter.got_input = true; coder->subfilter.got_input = true;
coder->alignment.in_pending += in_used; coder->subfilter.in_avail = in_size - *in_pos;
if (ret == LZMA_STREAM_END) { if (ret == LZMA_STREAM_END) {
// All currently available input must have
// been processed.
assert(*in_pos == in_size);
// Flush now. Even if coder->subblock.size
// happened to be zero, we still need to go
// to SEQ_FLUSH to possibly finish RLE or
// write the Subfilter Unset indicator.
coder->sequence = SEQ_FLUSH;
if (coder->subfilter.mode == SUB_RUN) {
// Flushing with Subfilter enabled.
assert(action == LZMA_SYNC_FLUSH);
coder->subfilter.mode = SUB_FLUSH;
break;
}
// Subfilter finished its job.
assert(coder->subfilter.mode == SUB_FINISH
|| action == LZMA_FINISH);
// At least one byte of input must have been
// encoded with the Subfilter. This is
// required by the file format specification.
if (!coder->subfilter.got_input)
return LZMA_PROG_ERROR;
// We don't strictly need to do this, but // We don't strictly need to do this, but
// doing it sounds like a good idea, because // doing it sounds like a good idea, because
// otherwise the Subfilter's memory could be // otherwise the Subfilter's memory could be
@ -343,35 +457,30 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
lzma_next_coder_end(&coder->subfilter.subcoder, lzma_next_coder_end(&coder->subfilter.subcoder,
allocator); allocator);
assert(coder->options != NULL); // We need to flush the currently buffered
coder->options->subfilter_mode // data and write Unset Subfilter marker.
= LZMA_SUBFILTER_NONE; // Note that we cannot set
// coder->options->subfilter_mode to
assert(coder->subfilter.mode == SUB_FINISH // LZMA_SUBFILTER_NONE yet, because we
|| action == LZMA_FINISH); // haven't written the Unset Subfilter
// marker yet.
coder->subfilter.mode = SUB_END_MARKER; coder->subfilter.mode = SUB_END_MARKER;
// Flush now. Even if coder->subblock.size
// happens to be zero, we still need to go
// to SEQ_FLUSH to write the Subfilter Unset
// indicator.
coder->sequence = SEQ_FLUSH; coder->sequence = SEQ_FLUSH;
break; break;
} }
// Return if an error occurred. // Return if we couldn't fill the buffer or
if (ret != LZMA_OK) // if an error occurred.
if (coder->subblock.size < coder->subblock.limit
|| ret != LZMA_OK)
return ret; return ret;
} }
// If we ran out of input before the whole buffer
// was filled, return to application.
if (coder->subblock.size < coder->subblock.limit
&& action != LZMA_FINISH)
return LZMA_OK;
coder->sequence = SEQ_FLUSH; coder->sequence = SEQ_FLUSH;
}
// SEQ_FILL doesn't produce any output so falling through
// to SEQ_FLUSH is safe.
assert(*out_pos < out_size);
// Fall through // Fall through
@ -471,21 +580,33 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
break; break;
} }
write_byte(0x50); coder->options->subfilter_mode = LZMA_SUBFILTER_NONE;
coder->subfilter.mode = SUB_NONE; coder->subfilter.mode = SUB_NONE;
write_byte(0x50);
if (*out_pos == out_size) if (*out_pos == out_size)
return LZMA_OK; return LZMA_OK;
} }
// Check if we have already written everything. // Check if we have already written everything.
if (action == LZMA_FINISH && *in_pos == in_size if (action != LZMA_RUN && *in_pos == in_size
&& coder->subfilter.mode == SUB_NONE) { && (coder->subfilter.mode == SUB_NONE
|| coder->subfilter.mode == SUB_FLUSH)) {
if (coder->rle.count > 0) { if (coder->rle.count > 0) {
subblock_rle_flush(coder); subblock_rle_flush(coder);
break; break;
} }
if (coder->use_eopm) { if (action == LZMA_SYNC_FLUSH) {
if (coder->subfilter.mode == SUB_FLUSH)
coder->subfilter.mode = SUB_RUN;
coder->subfilter.mode_locked = false;
coder->sequence = SEQ_FILL;
} else if (coder->use_eopm) {
assert(action == LZMA_FINISH);
// NOTE: No need to use write_byte() here // NOTE: No need to use write_byte() here
// since we are finishing. // since we are finishing.
out[*out_pos] = 0x10; out[*out_pos] = 0x10;
@ -586,7 +707,6 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
return LZMA_OK; return LZMA_OK;
coder->alignment.out_pos += coder->subblock.size; coder->alignment.out_pos += coder->subblock.size;
coder->subblock.size = 0; coder->subblock.size = 0;
coder->pos = 0; coder->pos = 0;
coder->sequence = SEQ_FLUSH; coder->sequence = SEQ_FLUSH;
@ -642,7 +762,12 @@ subblock_buffer(lzma_coder *coder, lzma_allocator *allocator,
coder->options->subfilter_mode = LZMA_SUBFILTER_RUN; coder->options->subfilter_mode = LZMA_SUBFILTER_RUN;
coder->subfilter.mode = SUB_RUN; coder->subfilter.mode = SUB_RUN;
coder->alignment.out_pos += coder->subfilter.flags_size;
coder->sequence = SEQ_SUBFILTER_FLAGS; coder->sequence = SEQ_SUBFILTER_FLAGS;
// It is safe to fall through because SEQ_SUBFILTER_FLAGS
// uses bufcpy() which doesn't write unless there is output
// space.
} }
// Fall through // Fall through
@ -681,7 +806,7 @@ subblock_encode(lzma_coder *coder, lzma_allocator *allocator,
out, out_pos, out_size, action); out, out_pos, out_size, action);
while (*out_pos < out_size while (*out_pos < out_size
&& (*in_pos < in_size || action == LZMA_FINISH)) { && (*in_pos < in_size || action != LZMA_RUN)) {
if (!coder->next_finished if (!coder->next_finished
&& coder->temp.pos == coder->temp.size) { && coder->temp.pos == coder->temp.size) {
coder->temp.pos = 0; coder->temp.pos = 0;
@ -692,7 +817,7 @@ subblock_encode(lzma_coder *coder, lzma_allocator *allocator,
coder->temp.buffer, &coder->temp.size, coder->temp.buffer, &coder->temp.size,
LZMA_BUFFER_SIZE, action); LZMA_BUFFER_SIZE, action);
if (ret == LZMA_STREAM_END) { if (ret == LZMA_STREAM_END) {
assert(action == LZMA_FINISH); assert(action != LZMA_RUN);
coder->next_finished = true; coder->next_finished = true;
} else if (coder->temp.size == 0 || ret != LZMA_OK) { } else if (coder->temp.size == 0 || ret != LZMA_OK) {
return ret; return ret;
@ -704,7 +829,7 @@ subblock_encode(lzma_coder *coder, lzma_allocator *allocator,
coder->temp.size, out, out_pos, out_size, coder->temp.size, out, out_pos, out_size,
coder->next_finished ? LZMA_FINISH : LZMA_RUN); coder->next_finished ? LZMA_FINISH : LZMA_RUN);
if (ret == LZMA_STREAM_END) { if (ret == LZMA_STREAM_END) {
assert(action == LZMA_FINISH); assert(action != LZMA_RUN);
assert(coder->next_finished); assert(coder->next_finished);
return LZMA_STREAM_END; return LZMA_STREAM_END;
} }
@ -765,6 +890,7 @@ lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
next->coder->subblock.size = 0; next->coder->subblock.size = 0;
next->coder->rle.count = 0; next->coder->rle.count = 0;
next->coder->subfilter.mode = SUB_NONE; next->coder->subfilter.mode = SUB_NONE;
next->coder->subfilter.mode_locked = false;
next->coder->temp.pos = 0; next->coder->temp.pos = 0;
next->coder->temp.size = 0; next->coder->temp.size = 0;
@ -781,10 +907,13 @@ lzma_subblock_encoder_init(lzma_next_coder *next, lzma_allocator *allocator,
} }
next->coder->alignment.multiple next->coder->alignment.multiple
= next->coder->options->alignment; = next->coder->options->alignment;
next->coder->subfilter.allow
= next->coder->options->allow_subfilters;
subblock_size_limit = next->coder->options->subblock_data_size; subblock_size_limit = next->coder->options->subblock_data_size;
} else { } else {
next->coder->alignment.multiple next->coder->alignment.multiple
= LZMA_SUBBLOCK_ALIGNMENT_DEFAULT; = LZMA_SUBBLOCK_ALIGNMENT_DEFAULT;
next->coder->subfilter.allow = false;
subblock_size_limit = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT; subblock_size_limit = LZMA_SUBBLOCK_DATA_SIZE_DEFAULT;
} }