856 lines
28 KiB
C
856 lines
28 KiB
C
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
//
|
||
|
/// \file file_info.c
|
||
|
/// \brief Decode .xz file information into a lzma_index structure
|
||
|
//
|
||
|
// Author: Lasse Collin
|
||
|
//
|
||
|
// This file has been put into the public domain.
|
||
|
// You can do whatever you want with this file.
|
||
|
//
|
||
|
///////////////////////////////////////////////////////////////////////////////
|
||
|
|
||
|
#include "index_decoder.h"
|
||
|
|
||
|
|
||
|
typedef struct {
|
||
|
enum {
|
||
|
SEQ_MAGIC_BYTES,
|
||
|
SEQ_PADDING_SEEK,
|
||
|
SEQ_PADDING_DECODE,
|
||
|
SEQ_FOOTER,
|
||
|
SEQ_INDEX_INIT,
|
||
|
SEQ_INDEX_DECODE,
|
||
|
SEQ_HEADER_DECODE,
|
||
|
SEQ_HEADER_COMPARE,
|
||
|
} sequence;
|
||
|
|
||
|
/// Absolute position of in[*in_pos] in the file. All code that
|
||
|
/// modifies *in_pos also updates this. seek_to_pos() needs this
|
||
|
/// to determine if we need to request the application to seek for
|
||
|
/// us or if we can do the seeking internally by adjusting *in_pos.
|
||
|
uint64_t file_cur_pos;
|
||
|
|
||
|
/// This refers to absolute positions of interesting parts of the
|
||
|
/// input file. Sometimes it points to the *beginning* of a specific
|
||
|
/// field and sometimes to the *end* of a field. The current target
|
||
|
/// position at each moment is explained in the comments.
|
||
|
uint64_t file_target_pos;
|
||
|
|
||
|
/// Size of the .xz file (from the application).
|
||
|
uint64_t file_size;
|
||
|
|
||
|
/// Index decoder
|
||
|
lzma_next_coder index_decoder;
|
||
|
|
||
|
/// Number of bytes remaining in the Index field that is currently
|
||
|
/// being decoded.
|
||
|
lzma_vli index_remaining;
|
||
|
|
||
|
/// The Index decoder will store the decoded Index in this pointer.
|
||
|
lzma_index *this_index;
|
||
|
|
||
|
/// Amount of Stream Padding in the current Stream.
|
||
|
lzma_vli stream_padding;
|
||
|
|
||
|
/// The final combined index is collected here.
|
||
|
lzma_index *combined_index;
|
||
|
|
||
|
/// Pointer from the application where to store the index information
|
||
|
/// after successful decoding.
|
||
|
lzma_index **dest_index;
|
||
|
|
||
|
/// Pointer to lzma_stream.seek_pos to be used when returning
|
||
|
/// LZMA_SEEK_NEEDED. This is set by seek_to_pos() when needed.
|
||
|
uint64_t *external_seek_pos;
|
||
|
|
||
|
/// Memory usage limit
|
||
|
uint64_t memlimit;
|
||
|
|
||
|
/// Stream Flags from the very beginning of the file.
|
||
|
lzma_stream_flags first_header_flags;
|
||
|
|
||
|
/// Stream Flags from Stream Header of the current Stream.
|
||
|
lzma_stream_flags header_flags;
|
||
|
|
||
|
/// Stream Flags from Stream Footer of the current Stream.
|
||
|
lzma_stream_flags footer_flags;
|
||
|
|
||
|
size_t temp_pos;
|
||
|
size_t temp_size;
|
||
|
uint8_t temp[8192];
|
||
|
|
||
|
} lzma_file_info_coder;
|
||
|
|
||
|
|
||
|
/// Copies data from in[*in_pos] into coder->temp until
|
||
|
/// coder->temp_pos == coder->temp_size. This also keeps coder->file_cur_pos
|
||
|
/// in sync with *in_pos. Returns true if more input is needed.
|
||
|
static bool
|
||
|
fill_temp(lzma_file_info_coder *coder, const uint8_t *restrict in,
|
||
|
size_t *restrict in_pos, size_t in_size)
|
||
|
{
|
||
|
coder->file_cur_pos += lzma_bufcpy(in, in_pos, in_size,
|
||
|
coder->temp, &coder->temp_pos, coder->temp_size);
|
||
|
return coder->temp_pos < coder->temp_size;
|
||
|
}
|
||
|
|
||
|
|
||
|
/// Seeks to the absolute file position specified by target_pos.
|
||
|
/// This tries to do the seeking by only modifying *in_pos, if possible.
|
||
|
/// The main benefit of this is that if one passes the whole file at once
|
||
|
/// to lzma_code(), the decoder will never need to return LZMA_SEEK_NEEDED
|
||
|
/// as all the seeking can be done by adjusting *in_pos in this function.
|
||
|
///
|
||
|
/// Returns true if an external seek is needed and the caller must return
|
||
|
/// LZMA_SEEK_NEEDED.
|
||
|
static bool
|
||
|
seek_to_pos(lzma_file_info_coder *coder, uint64_t target_pos,
|
||
|
size_t in_start, size_t *in_pos, size_t in_size)
|
||
|
{
|
||
|
// The input buffer doesn't extend beyond the end of the file.
|
||
|
// This has been checked by file_info_decode() already.
|
||
|
assert(coder->file_size - coder->file_cur_pos >= in_size - *in_pos);
|
||
|
|
||
|
const uint64_t pos_min = coder->file_cur_pos - (*in_pos - in_start);
|
||
|
const uint64_t pos_max = coder->file_cur_pos + (in_size - *in_pos);
|
||
|
|
||
|
bool external_seek_needed;
|
||
|
|
||
|
if (target_pos >= pos_min && target_pos <= pos_max) {
|
||
|
// The requested position is available in the current input
|
||
|
// buffer or right after it. That is, in a corner case we
|
||
|
// end up setting *in_pos == in_size and thus will immediately
|
||
|
// need new input bytes from the application.
|
||
|
*in_pos += (size_t)(target_pos - coder->file_cur_pos);
|
||
|
external_seek_needed = false;
|
||
|
} else {
|
||
|
// Ask the application to seek the input file.
|
||
|
*coder->external_seek_pos = target_pos;
|
||
|
external_seek_needed = true;
|
||
|
|
||
|
// Mark the whole input buffer as used. This way
|
||
|
// lzma_stream.total_in will have a better estimate
|
||
|
// of the amount of data read. It still won't be perfect
|
||
|
// as the value will depend on the input buffer size that
|
||
|
// the application uses, but it should be good enough for
|
||
|
// those few who want an estimate.
|
||
|
*in_pos = in_size;
|
||
|
}
|
||
|
|
||
|
// After seeking (internal or external) the current position
|
||
|
// will match the requested target position.
|
||
|
coder->file_cur_pos = target_pos;
|
||
|
|
||
|
return external_seek_needed;
|
||
|
}
|
||
|
|
||
|
|
||
|
/// The caller sets coder->file_target_pos so that it points to the *end*
|
||
|
/// of the desired file position. This function then determines how far
|
||
|
/// backwards from that position we can seek. After seeking fill_temp()
|
||
|
/// can be used to read data into coder->temp. When fill_temp() has finished,
|
||
|
/// coder->temp[coder->temp_size] will match coder->file_target_pos.
|
||
|
///
|
||
|
/// This also validates that coder->target_file_pos is sane in sense that
|
||
|
/// we aren't trying to seek too far backwards (too close or beyond the
|
||
|
/// beginning of the file).
|
||
|
static lzma_ret
|
||
|
reverse_seek(lzma_file_info_coder *coder,
|
||
|
size_t in_start, size_t *in_pos, size_t in_size)
|
||
|
{
|
||
|
// Check that there is enough data before the target position
|
||
|
// to contain at least Stream Header and Stream Footer. If there
|
||
|
// isn't, the file cannot be valid.
|
||
|
if (coder->file_target_pos < 2 * LZMA_STREAM_HEADER_SIZE)
|
||
|
return LZMA_DATA_ERROR;
|
||
|
|
||
|
coder->temp_pos = 0;
|
||
|
|
||
|
// The Stream Header at the very beginning of the file gets handled
|
||
|
// specially in SEQ_MAGIC_BYTES and thus we will never need to seek
|
||
|
// there. By not seeking to the first LZMA_STREAM_HEADER_SIZE bytes
|
||
|
// we avoid a useless external seek after SEQ_MAGIC_BYTES if the
|
||
|
// application uses an extremely small input buffer and the input
|
||
|
// file is very small.
|
||
|
if (coder->file_target_pos - LZMA_STREAM_HEADER_SIZE
|
||
|
< sizeof(coder->temp))
|
||
|
coder->temp_size = (size_t)(coder->file_target_pos
|
||
|
- LZMA_STREAM_HEADER_SIZE);
|
||
|
else
|
||
|
coder->temp_size = sizeof(coder->temp);
|
||
|
|
||
|
// The above if-statements guarantee this. This is important because
|
||
|
// the Stream Header/Footer decoders assume that there's at least
|
||
|
// LZMA_STREAM_HEADER_SIZE bytes in coder->temp.
|
||
|
assert(coder->temp_size >= LZMA_STREAM_HEADER_SIZE);
|
||
|
|
||
|
if (seek_to_pos(coder, coder->file_target_pos - coder->temp_size,
|
||
|
in_start, in_pos, in_size))
|
||
|
return LZMA_SEEK_NEEDED;
|
||
|
|
||
|
return LZMA_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
/// Gets the number of zero-bytes at the end of the buffer.
|
||
|
static size_t
|
||
|
get_padding_size(const uint8_t *buf, size_t buf_size)
|
||
|
{
|
||
|
size_t padding = 0;
|
||
|
while (buf_size > 0 && buf[--buf_size] == 0x00)
|
||
|
++padding;
|
||
|
|
||
|
return padding;
|
||
|
}
|
||
|
|
||
|
|
||
|
/// With the Stream Header at the very beginning of the file, LZMA_FORMAT_ERROR
|
||
|
/// is used to tell the application that Magic Bytes didn't match. In other
|
||
|
/// Stream Header/Footer fields (in the middle/end of the file) it could be
|
||
|
/// a bit confusing to return LZMA_FORMAT_ERROR as we already know that there
|
||
|
/// is a valid Stream Header at the beginning of the file. For those cases
|
||
|
/// this function is used to convert LZMA_FORMAT_ERROR to LZMA_DATA_ERROR.
|
||
|
static lzma_ret
|
||
|
hide_format_error(lzma_ret ret)
|
||
|
{
|
||
|
if (ret == LZMA_FORMAT_ERROR)
|
||
|
ret = LZMA_DATA_ERROR;
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
/// Calls the Index decoder and updates coder->index_remaining.
|
||
|
/// This is a separate function because the input can be either directly
|
||
|
/// from the application or from coder->temp.
|
||
|
static lzma_ret
|
||
|
decode_index(lzma_file_info_coder *coder, const lzma_allocator *allocator,
|
||
|
const uint8_t *restrict in, size_t *restrict in_pos,
|
||
|
size_t in_size, bool update_file_cur_pos)
|
||
|
{
|
||
|
const size_t in_start = *in_pos;
|
||
|
|
||
|
const lzma_ret ret = coder->index_decoder.code(
|
||
|
coder->index_decoder.coder,
|
||
|
allocator, in, in_pos, in_size,
|
||
|
NULL, NULL, 0, LZMA_RUN);
|
||
|
|
||
|
coder->index_remaining -= *in_pos - in_start;
|
||
|
|
||
|
if (update_file_cur_pos)
|
||
|
coder->file_cur_pos += *in_pos - in_start;
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
|
||
|
static lzma_ret
|
||
|
file_info_decode(void *coder_ptr, const lzma_allocator *allocator,
|
||
|
const uint8_t *restrict in, size_t *restrict in_pos,
|
||
|
size_t in_size,
|
||
|
uint8_t *restrict out lzma_attribute((__unused__)),
|
||
|
size_t *restrict out_pos lzma_attribute((__unused__)),
|
||
|
size_t out_size lzma_attribute((__unused__)),
|
||
|
lzma_action action lzma_attribute((__unused__)))
|
||
|
{
|
||
|
lzma_file_info_coder *coder = coder_ptr;
|
||
|
const size_t in_start = *in_pos;
|
||
|
|
||
|
// If the caller provides input past the end of the file, trim
|
||
|
// the extra bytes from the buffer so that we won't read too far.
|
||
|
assert(coder->file_size >= coder->file_cur_pos);
|
||
|
if (coder->file_size - coder->file_cur_pos < in_size - in_start)
|
||
|
in_size = in_start
|
||
|
+ (size_t)(coder->file_size - coder->file_cur_pos);
|
||
|
|
||
|
while (true)
|
||
|
switch (coder->sequence) {
|
||
|
case SEQ_MAGIC_BYTES:
|
||
|
// Decode the Stream Header at the beginning of the file
|
||
|
// first to check if the Magic Bytes match. The flags
|
||
|
// are stored in coder->first_header_flags so that we
|
||
|
// don't need to seek to it again.
|
||
|
//
|
||
|
// Check that the file is big enough to contain at least
|
||
|
// Stream Header.
|
||
|
if (coder->file_size < LZMA_STREAM_HEADER_SIZE)
|
||
|
return LZMA_FORMAT_ERROR;
|
||
|
|
||
|
// Read the Stream Header field into coder->temp.
|
||
|
if (fill_temp(coder, in, in_pos, in_size))
|
||
|
return LZMA_OK;
|
||
|
|
||
|
// This is the only Stream Header/Footer decoding where we
|
||
|
// want to return LZMA_FORMAT_ERROR if the Magic Bytes don't
|
||
|
// match. Elsewehere it will be converted to LZMA_DATA_ERROR.
|
||
|
return_if_error(lzma_stream_header_decode(
|
||
|
&coder->first_header_flags, coder->temp));
|
||
|
|
||
|
// Now that we know that the Magic Bytes match, check the
|
||
|
// file size. It's better to do this here after checking the
|
||
|
// Magic Bytes since this way we can give LZMA_FORMAT_ERROR
|
||
|
// instead of LZMA_DATA_ERROR when the Magic Bytes don't
|
||
|
// match in a file that is too big or isn't a multiple of
|
||
|
// four bytes.
|
||
|
if (coder->file_size > LZMA_VLI_MAX || (coder->file_size & 3))
|
||
|
return LZMA_DATA_ERROR;
|
||
|
|
||
|
// Start looking for Stream Padding and Stream Footer
|
||
|
// at the end of the file.
|
||
|
coder->file_target_pos = coder->file_size;
|
||
|
|
||
|
// Fall through
|
||
|
|
||
|
case SEQ_PADDING_SEEK:
|
||
|
coder->sequence = SEQ_PADDING_DECODE;
|
||
|
return_if_error(reverse_seek(
|
||
|
coder, in_start, in_pos, in_size));
|
||
|
|
||
|
// Fall through
|
||
|
|
||
|
case SEQ_PADDING_DECODE: {
|
||
|
// Copy to coder->temp first. This keeps the code simpler if
|
||
|
// the application only provides input a few bytes at a time.
|
||
|
if (fill_temp(coder, in, in_pos, in_size))
|
||
|
return LZMA_OK;
|
||
|
|
||
|
// Scan the buffer backwards to get the size of the
|
||
|
// Stream Padding field (if any).
|
||
|
const size_t new_padding = get_padding_size(
|
||
|
coder->temp, coder->temp_size);
|
||
|
coder->stream_padding += new_padding;
|
||
|
|
||
|
// Set the target position to the beginning of Stream Padding
|
||
|
// that has been observed so far. If all Stream Padding has
|
||
|
// been seen, then the target position will be at the end
|
||
|
// of the Stream Footer field.
|
||
|
coder->file_target_pos -= new_padding;
|
||
|
|
||
|
if (new_padding == coder->temp_size) {
|
||
|
// The whole buffer was padding. Seek backwards in
|
||
|
// the file to get more input.
|
||
|
coder->sequence = SEQ_PADDING_SEEK;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
// Size of Stream Padding must be a multiple of 4 bytes.
|
||
|
if (coder->stream_padding & 3)
|
||
|
return LZMA_DATA_ERROR;
|
||
|
|
||
|
coder->sequence = SEQ_FOOTER;
|
||
|
|
||
|
// Calculate the amount of non-padding data in coder->temp.
|
||
|
coder->temp_size -= new_padding;
|
||
|
coder->temp_pos = coder->temp_size;
|
||
|
|
||
|
// We can avoid an external seek if the whole Stream Footer
|
||
|
// is already in coder->temp. In that case SEQ_FOOTER won't
|
||
|
// read more input and will find the Stream Footer from
|
||
|
// coder->temp[coder->temp_size - LZMA_STREAM_HEADER_SIZE].
|
||
|
//
|
||
|
// Otherwise we will need to seek. The seeking is done so
|
||
|
// that Stream Footer wil be at the end of coder->temp.
|
||
|
// This way it's likely that we also get a complete Index
|
||
|
// field into coder->temp without needing a separate seek
|
||
|
// for that (unless the Index field is big).
|
||
|
if (coder->temp_size < LZMA_STREAM_HEADER_SIZE)
|
||
|
return_if_error(reverse_seek(
|
||
|
coder, in_start, in_pos, in_size));
|
||
|
}
|
||
|
|
||
|
// Fall through
|
||
|
|
||
|
case SEQ_FOOTER:
|
||
|
// Copy the Stream Footer field into coder->temp.
|
||
|
// If Stream Footer was already available in coder->temp
|
||
|
// in SEQ_PADDING_DECODE, then this does nothing.
|
||
|
if (fill_temp(coder, in, in_pos, in_size))
|
||
|
return LZMA_OK;
|
||
|
|
||
|
// Make coder->file_target_pos and coder->temp_size point
|
||
|
// to the beginning of Stream Footer and thus to the end
|
||
|
// of the Index field. coder->temp_pos will be updated
|
||
|
// a bit later.
|
||
|
coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
|
||
|
coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
|
||
|
|
||
|
// Decode Stream Footer.
|
||
|
return_if_error(hide_format_error(lzma_stream_footer_decode(
|
||
|
&coder->footer_flags,
|
||
|
coder->temp + coder->temp_size)));
|
||
|
|
||
|
// Check that we won't seek past the beginning of the file.
|
||
|
//
|
||
|
// LZMA_STREAM_HEADER_SIZE is added because there must be
|
||
|
// space for Stream Header too even though we won't seek
|
||
|
// there before decoding the Index field.
|
||
|
//
|
||
|
// There's no risk of integer overflow here because
|
||
|
// Backward Size cannot be greater than 2^34.
|
||
|
if (coder->file_target_pos < coder->footer_flags.backward_size
|
||
|
+ LZMA_STREAM_HEADER_SIZE)
|
||
|
return LZMA_DATA_ERROR;
|
||
|
|
||
|
// Set the target position to the beginning of the Index field.
|
||
|
coder->file_target_pos -= coder->footer_flags.backward_size;
|
||
|
coder->sequence = SEQ_INDEX_INIT;
|
||
|
|
||
|
// We can avoid an external seek if the whole Index field is
|
||
|
// already available in coder->temp.
|
||
|
if (coder->temp_size >= coder->footer_flags.backward_size) {
|
||
|
// Set coder->temp_pos to point to the beginning
|
||
|
// of the Index.
|
||
|
coder->temp_pos = coder->temp_size
|
||
|
- coder->footer_flags.backward_size;
|
||
|
} else {
|
||
|
// These are set to zero to indicate that there's no
|
||
|
// useful data (Index or anything else) in coder->temp.
|
||
|
coder->temp_pos = 0;
|
||
|
coder->temp_size = 0;
|
||
|
|
||
|
// Seek to the beginning of the Index field.
|
||
|
if (seek_to_pos(coder, coder->file_target_pos,
|
||
|
in_start, in_pos, in_size))
|
||
|
return LZMA_SEEK_NEEDED;
|
||
|
}
|
||
|
|
||
|
// Fall through
|
||
|
|
||
|
case SEQ_INDEX_INIT: {
|
||
|
// Calculate the amount of memory already used by the earlier
|
||
|
// Indexes so that we know how big memory limit to pass to
|
||
|
// the Index decoder.
|
||
|
//
|
||
|
// NOTE: When there are multiple Streams, the separate
|
||
|
// lzma_index structures can use more RAM (as measured by
|
||
|
// lzma_index_memused()) than the final combined lzma_index.
|
||
|
// Thus memlimit may need to be slightly higher than the final
|
||
|
// calculated memory usage will be. This is perhaps a bit
|
||
|
// confusing to the application, but I think it shouldn't
|
||
|
// cause problems in practice.
|
||
|
uint64_t memused = 0;
|
||
|
if (coder->combined_index != NULL) {
|
||
|
memused = lzma_index_memused(coder->combined_index);
|
||
|
assert(memused <= coder->memlimit);
|
||
|
if (memused > coder->memlimit) // Extra sanity check
|
||
|
return LZMA_PROG_ERROR;
|
||
|
}
|
||
|
|
||
|
// Initialize the Index decoder.
|
||
|
return_if_error(lzma_index_decoder_init(
|
||
|
&coder->index_decoder, allocator,
|
||
|
&coder->this_index,
|
||
|
coder->memlimit - memused));
|
||
|
|
||
|
coder->index_remaining = coder->footer_flags.backward_size;
|
||
|
coder->sequence = SEQ_INDEX_DECODE;
|
||
|
}
|
||
|
|
||
|
// Fall through
|
||
|
|
||
|
case SEQ_INDEX_DECODE: {
|
||
|
// Decode (a part of) the Index. If the whole Index is already
|
||
|
// in coder->temp, read it from there. Otherwise read from
|
||
|
// in[*in_pos] onwards. Note that index_decode() updates
|
||
|
// coder->index_remaining and optionally coder->file_cur_pos.
|
||
|
lzma_ret ret;
|
||
|
if (coder->temp_size != 0) {
|
||
|
assert(coder->temp_size - coder->temp_pos
|
||
|
== coder->index_remaining);
|
||
|
ret = decode_index(coder, allocator, coder->temp,
|
||
|
&coder->temp_pos, coder->temp_size,
|
||
|
false);
|
||
|
} else {
|
||
|
// Don't give the decoder more input than the known
|
||
|
// remaining size of the Index field.
|
||
|
size_t in_stop = in_size;
|
||
|
if (in_size - *in_pos > coder->index_remaining)
|
||
|
in_stop = *in_pos
|
||
|
+ (size_t)(coder->index_remaining);
|
||
|
|
||
|
ret = decode_index(coder, allocator,
|
||
|
in, in_pos, in_stop, true);
|
||
|
}
|
||
|
|
||
|
switch (ret) {
|
||
|
case LZMA_OK:
|
||
|
// If the Index docoder asks for more input when we
|
||
|
// have already given it as much input as Backward Size
|
||
|
// indicated, the file is invalid.
|
||
|
if (coder->index_remaining == 0)
|
||
|
return LZMA_DATA_ERROR;
|
||
|
|
||
|
// We cannot get here if we were reading Index from
|
||
|
// coder->temp because when reading from coder->temp
|
||
|
// we give the Index decoder exactly
|
||
|
// coder->index_remaining bytes of input.
|
||
|
assert(coder->temp_size == 0);
|
||
|
|
||
|
return LZMA_OK;
|
||
|
|
||
|
case LZMA_STREAM_END:
|
||
|
// If the decoding seems to be successful, check also
|
||
|
// that the Index decoder consumed as much input as
|
||
|
// indicated by the Backward Size field.
|
||
|
if (coder->index_remaining != 0)
|
||
|
return LZMA_DATA_ERROR;
|
||
|
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
// Calculate how much the Index tells us to seek backwards
|
||
|
// (relative to the beginning of the Index): Total size of
|
||
|
// all Blocks plus the size of the Stream Header field.
|
||
|
// No integer overflow here because lzma_index_total_size()
|
||
|
// cannot return a value greater than LZMA_VLI_MAX.
|
||
|
const uint64_t seek_amount
|
||
|
= lzma_index_total_size(coder->this_index)
|
||
|
+ LZMA_STREAM_HEADER_SIZE;
|
||
|
|
||
|
// Check that Index is sane in sense that seek_amount won't
|
||
|
// make us seek past the beginning of the file when locating
|
||
|
// the Stream Header.
|
||
|
//
|
||
|
// coder->file_target_pos still points to the beginning of
|
||
|
// the Index field.
|
||
|
if (coder->file_target_pos < seek_amount)
|
||
|
return LZMA_DATA_ERROR;
|
||
|
|
||
|
// Set the target to the beginning of Stream Header.
|
||
|
coder->file_target_pos -= seek_amount;
|
||
|
|
||
|
if (coder->file_target_pos == 0) {
|
||
|
// We would seek to the beginning of the file, but
|
||
|
// since we already decoded that Stream Header in
|
||
|
// SEQ_MAGIC_BYTES, we can use the cached value from
|
||
|
// coder->first_header_flags to avoid the seek.
|
||
|
coder->header_flags = coder->first_header_flags;
|
||
|
coder->sequence = SEQ_HEADER_COMPARE;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
coder->sequence = SEQ_HEADER_DECODE;
|
||
|
|
||
|
// Make coder->file_target_pos point to the end of
|
||
|
// the Stream Header field.
|
||
|
coder->file_target_pos += LZMA_STREAM_HEADER_SIZE;
|
||
|
|
||
|
// If coder->temp_size is non-zero, it points to the end
|
||
|
// of the Index field. Then the beginning of the Index
|
||
|
// field is at coder->temp[coder->temp_size
|
||
|
// - coder->footer_flags.backward_size].
|
||
|
assert(coder->temp_size == 0 || coder->temp_size
|
||
|
>= coder->footer_flags.backward_size);
|
||
|
|
||
|
// If coder->temp contained the whole Index, see if it has
|
||
|
// enough data to contain also the Stream Header. If so,
|
||
|
// we avoid an external seek.
|
||
|
//
|
||
|
// NOTE: This can happen only with small .xz files and only
|
||
|
// for the non-first Stream as the Stream Flags of the first
|
||
|
// Stream are cached and already handled a few lines above.
|
||
|
// So this isn't as useful as the other seek-avoidance cases.
|
||
|
if (coder->temp_size != 0 && coder->temp_size
|
||
|
- coder->footer_flags.backward_size
|
||
|
>= seek_amount) {
|
||
|
// Make temp_pos and temp_size point to the *end* of
|
||
|
// Stream Header so that SEQ_HEADER_DECODE will find
|
||
|
// the start of Stream Header from coder->temp[
|
||
|
// coder->temp_size - LZMA_STREAM_HEADER_SIZE].
|
||
|
coder->temp_pos = coder->temp_size
|
||
|
- coder->footer_flags.backward_size
|
||
|
- seek_amount
|
||
|
+ LZMA_STREAM_HEADER_SIZE;
|
||
|
coder->temp_size = coder->temp_pos;
|
||
|
} else {
|
||
|
// Seek so that Stream Header will be at the end of
|
||
|
// coder->temp. With typical multi-Stream files we
|
||
|
// will usually also get the Stream Footer and Index
|
||
|
// of the *previous* Stream in coder->temp and thus
|
||
|
// won't need a separate seek for them.
|
||
|
return_if_error(reverse_seek(coder,
|
||
|
in_start, in_pos, in_size));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Fall through
|
||
|
|
||
|
case SEQ_HEADER_DECODE:
|
||
|
// Copy the Stream Header field into coder->temp.
|
||
|
// If Stream Header was already available in coder->temp
|
||
|
// in SEQ_INDEX_DECODE, then this does nothing.
|
||
|
if (fill_temp(coder, in, in_pos, in_size))
|
||
|
return LZMA_OK;
|
||
|
|
||
|
// Make all these point to the beginning of Stream Header.
|
||
|
coder->file_target_pos -= LZMA_STREAM_HEADER_SIZE;
|
||
|
coder->temp_size -= LZMA_STREAM_HEADER_SIZE;
|
||
|
coder->temp_pos = coder->temp_size;
|
||
|
|
||
|
// Decode the Stream Header.
|
||
|
return_if_error(hide_format_error(lzma_stream_header_decode(
|
||
|
&coder->header_flags,
|
||
|
coder->temp + coder->temp_size)));
|
||
|
|
||
|
coder->sequence = SEQ_HEADER_COMPARE;
|
||
|
|
||
|
// Fall through
|
||
|
|
||
|
case SEQ_HEADER_COMPARE:
|
||
|
// Compare Stream Header against Stream Footer. They must
|
||
|
// match.
|
||
|
return_if_error(lzma_stream_flags_compare(
|
||
|
&coder->header_flags, &coder->footer_flags));
|
||
|
|
||
|
// Store the decoded Stream Flags into the Index. Use the
|
||
|
// Footer Flags because it contains Backward Size, although
|
||
|
// it shouldn't matter in practice.
|
||
|
if (lzma_index_stream_flags(coder->this_index,
|
||
|
&coder->footer_flags) != LZMA_OK)
|
||
|
return LZMA_PROG_ERROR;
|
||
|
|
||
|
// Store also the size of the Stream Padding field. It is
|
||
|
// needed to calculate the offsets of the Streams correctly.
|
||
|
if (lzma_index_stream_padding(coder->this_index,
|
||
|
coder->stream_padding) != LZMA_OK)
|
||
|
return LZMA_PROG_ERROR;
|
||
|
|
||
|
// Reset it so that it's ready for the next Stream.
|
||
|
coder->stream_padding = 0;
|
||
|
|
||
|
// Append the earlier decoded Indexes after this_index.
|
||
|
if (coder->combined_index != NULL)
|
||
|
return_if_error(lzma_index_cat(coder->this_index,
|
||
|
coder->combined_index, allocator));
|
||
|
|
||
|
coder->combined_index = coder->this_index;
|
||
|
coder->this_index = NULL;
|
||
|
|
||
|
// If the whole file was decoded, tell the caller that we
|
||
|
// are finished.
|
||
|
if (coder->file_target_pos == 0) {
|
||
|
// The combined index must indicate the same file
|
||
|
// size as was told to us at initialization.
|
||
|
assert(lzma_index_file_size(coder->combined_index)
|
||
|
== coder->file_size);
|
||
|
|
||
|
// Make the combined index available to
|
||
|
// the application.
|
||
|
*coder->dest_index = coder->combined_index;
|
||
|
coder->combined_index = NULL;
|
||
|
|
||
|
// Mark the input buffer as used since we may have
|
||
|
// done internal seeking and thus don't know how
|
||
|
// many input bytes were actually used. This way
|
||
|
// lzma_stream.total_in gets a slightly better
|
||
|
// estimate of the amount of input used.
|
||
|
*in_pos = in_size;
|
||
|
return LZMA_STREAM_END;
|
||
|
}
|
||
|
|
||
|
// We didn't hit the beginning of the file yet, so continue
|
||
|
// reading backwards in the file. If we have unprocessed
|
||
|
// data in coder->temp, use it before requesting more data
|
||
|
// from the application.
|
||
|
//
|
||
|
// coder->file_target_pos, coder->temp_size, and
|
||
|
// coder->temp_pos all point to the beginning of Stream Header
|
||
|
// and thus the end of the previous Stream in the file.
|
||
|
coder->sequence = coder->temp_size > 0
|
||
|
? SEQ_PADDING_DECODE : SEQ_PADDING_SEEK;
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
assert(0);
|
||
|
return LZMA_PROG_ERROR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
static lzma_ret
|
||
|
file_info_decoder_memconfig(void *coder_ptr, uint64_t *memusage,
|
||
|
uint64_t *old_memlimit, uint64_t new_memlimit)
|
||
|
{
|
||
|
lzma_file_info_coder *coder = coder_ptr;
|
||
|
|
||
|
// The memory usage calculation comes from three things:
|
||
|
//
|
||
|
// (1) The Indexes that have already been decoded and processed into
|
||
|
// coder->combined_index.
|
||
|
//
|
||
|
// (2) The latest Index in coder->this_index that has been decoded but
|
||
|
// not yet put into coder->combined_index.
|
||
|
//
|
||
|
// (3) The latest Index that we have started decoding but haven't
|
||
|
// finished and thus isn't available in coder->this_index yet.
|
||
|
// Memory usage and limit information needs to be communicated
|
||
|
// from/to coder->index_decoder.
|
||
|
//
|
||
|
// Care has to be taken to not do both (2) and (3) when calculating
|
||
|
// the memory usage.
|
||
|
uint64_t combined_index_memusage = 0;
|
||
|
uint64_t this_index_memusage = 0;
|
||
|
|
||
|
// (1) If we have already successfully decoded one or more Indexes,
|
||
|
// get their memory usage.
|
||
|
if (coder->combined_index != NULL)
|
||
|
combined_index_memusage = lzma_index_memused(
|
||
|
coder->combined_index);
|
||
|
|
||
|
// Choose between (2), (3), or neither.
|
||
|
if (coder->this_index != NULL) {
|
||
|
// (2) The latest Index is available. Use its memory usage.
|
||
|
this_index_memusage = lzma_index_memused(coder->this_index);
|
||
|
|
||
|
} else if (coder->sequence == SEQ_INDEX_DECODE) {
|
||
|
// (3) The Index decoder is activate and hasn't yet stored
|
||
|
// the new index in coder->this_index. Get the memory usage
|
||
|
// information from the Index decoder.
|
||
|
//
|
||
|
// NOTE: If the Index decoder doesn't yet know how much memory
|
||
|
// it will eventually need, it will return a tiny value here.
|
||
|
uint64_t dummy;
|
||
|
if (coder->index_decoder.memconfig(coder->index_decoder.coder,
|
||
|
&this_index_memusage, &dummy, 0)
|
||
|
!= LZMA_OK) {
|
||
|
assert(0);
|
||
|
return LZMA_PROG_ERROR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Now we know the total memory usage/requirement. If we had neither
|
||
|
// old Indexes nor a new Index, this will be zero which isn't
|
||
|
// acceptable as lzma_memusage() has to return non-zero on success
|
||
|
// and even with an empty .xz file we will end up with a lzma_index
|
||
|
// that takes some memory.
|
||
|
*memusage = combined_index_memusage + this_index_memusage;
|
||
|
if (*memusage == 0)
|
||
|
*memusage = lzma_index_memusage(1, 0);
|
||
|
|
||
|
*old_memlimit = coder->memlimit;
|
||
|
|
||
|
// If requested, set a new memory usage limit.
|
||
|
if (new_memlimit != 0) {
|
||
|
if (new_memlimit < *memusage)
|
||
|
return LZMA_MEMLIMIT_ERROR;
|
||
|
|
||
|
// In the condition (3) we need to tell the Index decoder
|
||
|
// its new memory usage limit.
|
||
|
if (coder->this_index == NULL
|
||
|
&& coder->sequence == SEQ_INDEX_DECODE) {
|
||
|
const uint64_t idec_new_memlimit = new_memlimit
|
||
|
- combined_index_memusage;
|
||
|
|
||
|
assert(this_index_memusage > 0);
|
||
|
assert(idec_new_memlimit > 0);
|
||
|
|
||
|
uint64_t dummy1;
|
||
|
uint64_t dummy2;
|
||
|
|
||
|
if (coder->index_decoder.memconfig(
|
||
|
coder->index_decoder.coder,
|
||
|
&dummy1, &dummy2, idec_new_memlimit)
|
||
|
!= LZMA_OK) {
|
||
|
assert(0);
|
||
|
return LZMA_PROG_ERROR;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
coder->memlimit = new_memlimit;
|
||
|
}
|
||
|
|
||
|
return LZMA_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
static void
|
||
|
file_info_decoder_end(void *coder_ptr, const lzma_allocator *allocator)
|
||
|
{
|
||
|
lzma_file_info_coder *coder = coder_ptr;
|
||
|
|
||
|
lzma_next_end(&coder->index_decoder, allocator);
|
||
|
lzma_index_end(coder->this_index, allocator);
|
||
|
lzma_index_end(coder->combined_index, allocator);
|
||
|
|
||
|
lzma_free(coder, allocator);
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
static lzma_ret
|
||
|
lzma_file_info_decoder_init(lzma_next_coder *next,
|
||
|
const lzma_allocator *allocator, uint64_t *seek_pos,
|
||
|
lzma_index **dest_index,
|
||
|
uint64_t memlimit, uint64_t file_size)
|
||
|
{
|
||
|
lzma_next_coder_init(&lzma_file_info_decoder_init, next, allocator);
|
||
|
|
||
|
if (dest_index == NULL)
|
||
|
return LZMA_PROG_ERROR;
|
||
|
|
||
|
lzma_file_info_coder *coder = next->coder;
|
||
|
if (coder == NULL) {
|
||
|
coder = lzma_alloc(sizeof(lzma_file_info_coder), allocator);
|
||
|
if (coder == NULL)
|
||
|
return LZMA_MEM_ERROR;
|
||
|
|
||
|
next->coder = coder;
|
||
|
next->code = &file_info_decode;
|
||
|
next->end = &file_info_decoder_end;
|
||
|
next->memconfig = &file_info_decoder_memconfig;
|
||
|
|
||
|
coder->index_decoder = LZMA_NEXT_CODER_INIT;
|
||
|
coder->this_index = NULL;
|
||
|
coder->combined_index = NULL;
|
||
|
}
|
||
|
|
||
|
coder->sequence = SEQ_MAGIC_BYTES;
|
||
|
coder->file_cur_pos = 0;
|
||
|
coder->file_target_pos = 0;
|
||
|
coder->file_size = file_size;
|
||
|
|
||
|
lzma_index_end(coder->this_index, allocator);
|
||
|
coder->this_index = NULL;
|
||
|
|
||
|
lzma_index_end(coder->combined_index, allocator);
|
||
|
coder->combined_index = NULL;
|
||
|
|
||
|
coder->stream_padding = 0;
|
||
|
|
||
|
coder->dest_index = dest_index;
|
||
|
coder->external_seek_pos = seek_pos;
|
||
|
|
||
|
// If memlimit is 0, make it 1 to ensure that lzma_memlimit_get()
|
||
|
// won't return 0 (which would indicate an error).
|
||
|
coder->memlimit = my_max(1, memlimit);
|
||
|
|
||
|
// Preprare thse for reading the first Stream Header into coder->temp.
|
||
|
coder->temp_pos = 0;
|
||
|
coder->temp_size = LZMA_STREAM_HEADER_SIZE;
|
||
|
|
||
|
return LZMA_OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
extern LZMA_API(lzma_ret)
|
||
|
lzma_file_info_decoder(lzma_stream *strm, lzma_index **dest_index,
|
||
|
uint64_t memlimit, uint64_t file_size)
|
||
|
{
|
||
|
lzma_next_strm_init(lzma_file_info_decoder_init, strm, &strm->seek_pos,
|
||
|
dest_index, memlimit, file_size);
|
||
|
|
||
|
// We allow LZMA_FINISH in addition to LZMA_RUN for convenience.
|
||
|
// lzma_code() is able to handle the LZMA_FINISH + LZMA_SEEK_NEEDED
|
||
|
// combination in a sane way. Applications still need to be careful
|
||
|
// if they use LZMA_FINISH so that they remember to reset it back
|
||
|
// to LZMA_RUN after seeking if needed.
|
||
|
strm->internal->supported_actions[LZMA_RUN] = true;
|
||
|
strm->internal->supported_actions[LZMA_FINISH] = true;
|
||
|
|
||
|
return LZMA_OK;
|
||
|
}
|