xz-analysis-mirror/src/xz/list.c

753 lines
20 KiB
C
Raw Normal View History

2007-12-08 17:42:33 -05:00
///////////////////////////////////////////////////////////////////////////////
//
/// \file list.c
/// \brief Listing information about .xz files
2007-12-08 17:42:33 -05:00
//
// Author: Lasse Collin
2007-12-08 17:42:33 -05:00
//
// This file has been put into the public domain.
// You can do whatever you want with this file.
2007-12-08 17:42:33 -05:00
//
///////////////////////////////////////////////////////////////////////////////
#include "private.h"
#include "tuklib_integer.h"
/// Totals that are displayed if there was more than one file.
/// The "files" counter is also used in print_info_adv() to show
/// the file number.
static struct {
uint64_t files;
uint64_t streams;
uint64_t blocks;
uint64_t compressed_size;
uint64_t uncompressed_size;
uint32_t checks;
} totals = { 0, 0, 0, 0, 0, 0 };
/// \brief Parse the Index(es) from the given .xz file
///
/// \param idx If decoding is successful, *idx will be set to point
/// to lzma_index containing the decoded information.
/// On error, *idx is not modified.
/// \param pair Input file
///
/// \return On success, false is returned. On error, true is returned.
///
// TODO: This function is pretty big. liblzma should have a function that
// takes a callback function to parse the Index(es) from a .xz file to make
// it easy for applications.
static bool
parse_indexes(lzma_index **idx, file_pair *pair)
{
if (pair->src_st.st_size <= 0) {
message_error(_("%s: File is empty"), pair->src_name);
return true;
}
2007-12-08 17:42:33 -05:00
if (pair->src_st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
message_error(_("%s: Too small to be a valid .xz file"),
pair->src_name);
return true;
}
2007-12-08 17:42:33 -05:00
io_buf buf;
lzma_stream_flags header_flags;
lzma_stream_flags footer_flags;
lzma_ret ret;
2007-12-08 17:42:33 -05:00
// lzma_stream for the Index decoder
lzma_stream strm = LZMA_STREAM_INIT;
2007-12-08 17:42:33 -05:00
// All Indexes decoded so far
lzma_index *combined_index = NULL;
2007-12-08 17:42:33 -05:00
// The Index currently being decoded
lzma_index *this_index = NULL;
2007-12-08 17:42:33 -05:00
// Current position in the file. We parse the file backwards so
// initialize it to point to the end of the file.
off_t pos = pair->src_st.st_size;
2007-12-08 17:42:33 -05:00
// Each loop iteration decodes one Index.
do {
// Check that there is enough data left to contain at least
// the Stream Header and Stream Footer. This check cannot
// fail in the first pass of this loop.
if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
message_error("%s: %s", pair->src_name,
message_strm(LZMA_DATA_ERROR));
goto error;
}
2007-12-08 17:42:33 -05:00
pos -= LZMA_STREAM_HEADER_SIZE;
lzma_vli stream_padding = 0;
// Locate the Stream Footer. There may be Stream Padding which
// we must skip when reading backwards.
while (true) {
if (pos < LZMA_STREAM_HEADER_SIZE) {
message_error("%s: %s", pair->src_name,
message_strm(
LZMA_DATA_ERROR));
goto error;
}
if (io_pread(pair, &buf,
LZMA_STREAM_HEADER_SIZE, pos))
goto error;
// Stream Padding is always a multiple of four bytes.
int i = 2;
if (buf.u32[i] != 0)
break;
// To avoid calling io_pread() for every four bytes
// of Stream Padding, take advantage that we read
// 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
// check them too before calling io_pread() again.
do {
stream_padding += 4;
pos -= 4;
--i;
} while (i >= 0 && buf.u32[i] == 0);
}
2007-12-08 17:42:33 -05:00
// Decode the Stream Footer.
ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
if (ret != LZMA_OK) {
message_error("%s: %s", pair->src_name,
message_strm(ret));
goto error;
}
2007-12-08 17:42:33 -05:00
// Check that the size of the Index field looks sane.
lzma_vli index_size = footer_flags.backward_size;
if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
message_error("%s: %s", pair->src_name,
message_strm(LZMA_DATA_ERROR));
goto error;
}
2007-12-08 17:42:33 -05:00
// Set pos to the beginning of the Index.
pos -= index_size;
2007-12-08 17:42:33 -05:00
// See how much memory we can use for decoding this Index.
uint64_t memlimit = hardware_memlimit_get();
uint64_t memused = 0;
if (combined_index != NULL) {
memused = lzma_index_memused(combined_index);
if (memused > memlimit)
message_bug();
2007-12-08 17:42:33 -05:00
memlimit -= memused;
}
2007-12-08 17:42:33 -05:00
// Decode the Index.
ret = lzma_index_decoder(&strm, &this_index, memlimit);
if (ret != LZMA_OK) {
message_error("%s: %s", pair->src_name,
message_strm(ret));
goto error;
}
2007-12-08 17:42:33 -05:00
do {
// Don't give the decoder more input than the
// Index size.
strm.avail_in = MIN(IO_BUFFER_SIZE, index_size);
if (io_pread(pair, &buf, strm.avail_in, pos))
goto error;
pos += strm.avail_in;
index_size -= strm.avail_in;
strm.next_in = buf.u8;
ret = lzma_code(&strm, LZMA_RUN);
} while (ret == LZMA_OK);
// If the decoding seems to be successful, check also that
// the Index decoder consumed as much input as indicated
// by the Backward Size field.
if (ret == LZMA_STREAM_END)
if (index_size != 0 || strm.avail_in != 0)
ret = LZMA_DATA_ERROR;
if (ret != LZMA_STREAM_END) {
// LZMA_BUFFER_ERROR means that the Index decoder
// would have liked more input than what the Index
// size should be according to Stream Footer.
// The message for LZMA_DATA_ERROR makes more
// sense in that case.
if (ret == LZMA_BUF_ERROR)
ret = LZMA_DATA_ERROR;
message_error("%s: %s", pair->src_name,
message_strm(ret));
// If the error was too low memory usage limit,
// show also how much memory would have been needed.
if (ret == LZMA_MEMLIMIT_ERROR) {
uint64_t needed = lzma_memusage(&strm);
if (UINT64_MAX - needed < memused)
needed = UINT64_MAX;
else
needed += memused;
message_mem_needed(V_ERROR, needed);
}
goto error;
}
2007-12-08 17:42:33 -05:00
// Decode the Stream Header and check that its Stream Flags
// match the Stream Footer.
pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
message_error("%s: %s", pair->src_name,
message_strm(LZMA_DATA_ERROR));
goto error;
}
2007-12-08 17:42:33 -05:00
pos -= lzma_index_total_size(this_index);
if (io_pread(pair, &buf, LZMA_STREAM_HEADER_SIZE, pos))
goto error;
2007-12-08 17:42:33 -05:00
ret = lzma_stream_header_decode(&header_flags, buf.u8);
if (ret != LZMA_OK) {
message_error("%s: %s", pair->src_name,
message_strm(ret));
goto error;
}
2007-12-08 17:42:33 -05:00
ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
if (ret != LZMA_OK) {
message_error("%s: %s", pair->src_name,
message_strm(ret));
goto error;
}
2007-12-08 17:42:33 -05:00
// Store the decoded Stream Flags into this_index. This is
// needed so that we can print which Check is used in each
// Stream.
ret = lzma_index_stream_flags(this_index, &footer_flags);
if (ret != LZMA_OK)
message_bug();
2007-12-08 17:42:33 -05:00
// Store also the size of the Stream Padding field. It is
// needed to show the offsets of the Streams correctly.
ret = lzma_index_stream_padding(this_index, stream_padding);
if (ret != LZMA_OK)
message_bug();
if (combined_index != NULL) {
// Append the earlier decoded Indexes
// after this_index.
ret = lzma_index_cat(
this_index, combined_index, NULL);
if (ret != LZMA_OK) {
message_error("%s: %s", pair->src_name,
message_strm(ret));
goto error;
}
}
2007-12-08 17:42:33 -05:00
combined_index = this_index;
this_index = NULL;
2007-12-08 17:42:33 -05:00
} while (pos > 0);
2007-12-08 17:42:33 -05:00
lzma_end(&strm);
2007-12-08 17:42:33 -05:00
// All OK. Make combined_index available to the caller.
*idx = combined_index;
return false;
2007-12-08 17:42:33 -05:00
error:
// Something went wrong, free the allocated memory.
lzma_end(&strm);
lzma_index_end(combined_index, NULL);
lzma_index_end(this_index, NULL);
return true;
2007-12-08 17:42:33 -05:00
}
/// \brief Get the compression ratio
///
/// This has slightly different format than that is used by in message.c.
static const char *
get_ratio(uint64_t compressed_size, uint64_t uncompressed_size)
{
if (uncompressed_size == 0)
return "---";
2007-12-08 17:42:33 -05:00
const double ratio = (double)(compressed_size)
/ (double)(uncompressed_size);
if (ratio > 9.999)
return "---";
2007-12-08 17:42:33 -05:00
static char buf[6];
snprintf(buf, sizeof(buf), "%.3f", ratio);
return buf;
}
2007-12-08 17:42:33 -05:00
static const char check_names[LZMA_CHECK_ID_MAX + 1][12] = {
"None",
"CRC32",
"Unknown-2",
"Unknown-3",
"CRC64",
"Unknown-5",
"Unknown-6",
"Unknown-7",
"Unknown-8",
"Unknown-9",
"SHA-256",
"Unknown-11",
"Unknown-12",
"Unknown-13",
"Unknown-14",
"Unknown-15",
};
/// \brief Get a comma-separated list of Check names
///
/// \param checks Bit mask of Checks to print
/// \param space_after_comma
/// It's better to not use spaces in table-like listings,
/// but in more verbose formats a space after a comma
/// is good for readability.
static const char *
get_check_names(uint32_t checks, bool space_after_comma)
2007-12-08 17:42:33 -05:00
{
assert(checks != 0);
2007-12-08 17:42:33 -05:00
static char buf[sizeof(check_names)];
char *pos = buf;
size_t left = sizeof(buf);
2007-12-08 17:42:33 -05:00
const char *sep = space_after_comma ? ", " : ",";
bool comma = false;
2007-12-08 17:42:33 -05:00
for (size_t i = 0; i <= LZMA_CHECK_ID_MAX; ++i) {
if (checks & (UINT32_C(1) << i)) {
my_snprintf(&pos, &left, "%s%s",
comma ? sep : "", check_names[i]);
comma = true;
}
2007-12-08 17:42:33 -05:00
}
return buf;
2007-12-08 17:42:33 -05:00
}
/// \brief Read the Check value from the .xz file and print it
///
/// Since this requires a seek, listing all Check values for all Blocks can
/// be slow.
///
/// \param pair Input file
/// \param iter Location of the Block whose Check value should
/// be printed.
///
/// \return False on success, true on I/O error.
2007-12-08 17:42:33 -05:00
static bool
print_check_value(file_pair *pair, const lzma_index_iter *iter)
2007-12-08 17:42:33 -05:00
{
// Don't read anything from the file if there is no integrity Check.
if (iter->stream.flags->check == LZMA_CHECK_NONE) {
printf("---");
return false;
2007-12-08 17:42:33 -05:00
}
// Locate and read the Check field.
const uint32_t size = lzma_check_size(iter->stream.flags->check);
const off_t offset = iter->block.compressed_file_offset
+ iter->block.total_size - size;
io_buf buf;
if (io_pread(pair, &buf, size, offset))
2007-12-08 17:42:33 -05:00
return true;
// CRC32 and CRC64 are in little endian. Guess that all the future
// 32-bit and 64-bit Check values are little endian too. It shouldn't
// be a too big problem if this guess is wrong.
if (size == 4) {
printf("%08" PRIx32, conv32le(buf.u32[0]));
} else if (size == 8) {
printf("%016" PRIx64, conv64le(buf.u64[0]));
} else {
for (size_t i = 0; i < size; ++i)
printf("%02x", buf.u8[i]);
2007-12-08 17:42:33 -05:00
}
return false;
}
static void
print_info_basic(const lzma_index *idx, file_pair *pair)
2007-12-08 17:42:33 -05:00
{
static bool headings_displayed = false;
if (!headings_displayed) {
headings_displayed = true;
// TRANSLATORS: These are column titles. From Strms (Streams)
// to Ratio, the columns are right aligned. Check and Filename
// are left aligned. If you need longer words, it's OK to
// use two lines here. Test with xz --list.
puts(_("Strms Blocks Compressed Uncompressed Ratio "
"Check Filename"));
}
2007-12-08 17:42:33 -05:00
printf("%5s %7s %11s %11s %5s %-7s %s\n",
uint64_to_str(lzma_index_stream_count(idx), 0),
uint64_to_str(lzma_index_block_count(idx), 1),
uint64_to_nicestr(lzma_index_file_size(idx),
NICESTR_B, NICESTR_TIB, false, 2),
uint64_to_nicestr(lzma_index_uncompressed_size(idx),
NICESTR_B, NICESTR_TIB, false, 3),
get_ratio(lzma_index_file_size(idx),
lzma_index_uncompressed_size(idx)),
get_check_names(lzma_index_checks(idx), false),
pair->src_name);
2007-12-08 17:42:33 -05:00
return;
}
2007-12-08 17:42:33 -05:00
static void
print_adv_helper(uint64_t stream_count, uint64_t block_count,
uint64_t compressed_size, uint64_t uncompressed_size,
uint32_t checks)
{
printf(_(" Stream count: %s\n"),
uint64_to_str(stream_count, 0));
printf(_(" Block count: %s\n"),
uint64_to_str(block_count, 0));
printf(_(" Compressed size: %s\n"),
uint64_to_nicestr(compressed_size,
NICESTR_B, NICESTR_TIB, true, 0));
printf(_(" Uncompressed size: %s\n"),
uint64_to_nicestr(uncompressed_size,
NICESTR_B, NICESTR_TIB, true, 0));
printf(_(" Ratio: %s\n"),
get_ratio(compressed_size, uncompressed_size));
printf(_(" Check: %s\n"),
get_check_names(checks, true));
return;
}
2007-12-08 17:42:33 -05:00
static void
print_info_adv(const lzma_index *idx, file_pair *pair)
{
// Print an empty line between files.
static bool first_filename_printed = false;
if (!first_filename_printed)
first_filename_printed = true;
else
putchar('\n');
// Print the filename and overall information.
printf("%s (%" PRIu64 "):\n", pair->src_name, totals.files);
print_adv_helper(lzma_index_stream_count(idx),
lzma_index_block_count(idx),
lzma_index_file_size(idx),
lzma_index_uncompressed_size(idx),
lzma_index_checks(idx));
// TODO: The rest of this function needs some work. Currently
// the offsets are not printed, which could be useful even when
// printed in a less accurate format. On the other hand, maybe
// this should print the information with exact byte values,
// or maybe there should be at least an option to do that.
//
// We could also display some other info. E.g. it could be useful
// to quickly see how big is the biggest Block (uncompressed size)
// and if all Blocks have Compressed Size and Uncompressed Size
// fields present, which can be used e.g. for multithreaded
// decompression.
// Avoid printing Stream and Block lists when they wouldn't be useful.
bool show_blocks = false;
if (lzma_index_stream_count(idx) > 1) {
puts(_(" Streams:"));
puts(_(" Number Blocks Compressed "
"Uncompressed Ratio Check"));
lzma_index_iter iter;
lzma_index_iter_init(&iter, idx);
while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM)) {
if (iter.stream.block_count > 1)
show_blocks = true;
printf(" %8s %10s %11s %11s %5s %s\n",
uint64_to_str(iter.stream.number, 0),
uint64_to_str(iter.stream.block_count, 1),
uint64_to_nicestr(
iter.stream.compressed_size,
NICESTR_B, NICESTR_TIB, false, 2),
uint64_to_nicestr(
iter.stream.uncompressed_size,
NICESTR_B, NICESTR_TIB, false, 3),
get_ratio(iter.stream.compressed_size,
iter.stream.uncompressed_size),
check_names[iter.stream.flags->check]);
}
2007-12-08 17:42:33 -05:00
}
if (show_blocks || lzma_index_block_count(idx)
> lzma_index_stream_count(idx)
|| message_verbosity_get() >= V_DEBUG) {
puts(_(" Blocks:"));
// FIXME: Number in Stream/file, which one is better?
puts(_(" Stream Number Compressed "
"Uncompressed Ratio Check"));
lzma_index_iter iter;
lzma_index_iter_init(&iter, idx);
while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
printf(" %8s %10s %11s %11s %5s %-7s",
uint64_to_str(iter.stream.number, 0),
uint64_to_str(iter.block.number_in_stream, 1),
uint64_to_nicestr(iter.block.total_size,
NICESTR_B, NICESTR_TIB, false, 2),
uint64_to_nicestr(
iter.block.uncompressed_size,
NICESTR_B, NICESTR_TIB, false, 3),
get_ratio(iter.block.total_size,
iter.block.uncompressed_size),
check_names[iter.stream.flags->check]);
if (message_verbosity_get() >= V_DEBUG)
if (print_check_value(pair, &iter))
return;
putchar('\n');
}
2007-12-08 17:42:33 -05:00
}
}
2007-12-08 17:42:33 -05:00
static void
print_info_robot(const lzma_index *idx, file_pair *pair)
{
printf("file\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
"\t%s\t%s\t%s\n",
lzma_index_stream_count(idx),
lzma_index_block_count(idx),
lzma_index_file_size(idx),
lzma_index_uncompressed_size(idx),
get_ratio(lzma_index_file_size(idx),
lzma_index_uncompressed_size(idx)),
get_check_names(lzma_index_checks(idx), false),
pair->src_name);
if (message_verbosity_get() >= V_VERBOSE) {
lzma_index_iter iter;
lzma_index_iter_init(&iter, idx);
while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_STREAM))
printf("stream\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
"\t%" PRIu64 "\t%" PRIu64
"\t%s\t%" PRIu64 "\t%s\n",
iter.stream.number,
iter.stream.compressed_offset,
iter.stream.uncompressed_offset,
iter.stream.compressed_size,
iter.stream.uncompressed_size,
get_ratio(iter.stream.compressed_size,
iter.stream.uncompressed_size),
iter.stream.padding,
check_names[iter.stream.flags->check]);
lzma_index_iter_rewind(&iter);
while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
printf("block\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
"\t%" PRIu64 "\t%" PRIu64
"\t%" PRIu64 "\t%" PRIu64 "\t%s\t%s",
iter.stream.number,
iter.block.number_in_stream,
iter.block.number_in_file,
iter.block.compressed_file_offset,
iter.block.uncompressed_file_offset,
iter.block.total_size,
iter.block.uncompressed_size,
get_ratio(iter.block.total_size,
iter.block.uncompressed_size),
check_names[iter.stream.flags->check]);
if (message_verbosity_get() >= V_DEBUG) {
putchar('\t');
if (print_check_value(pair, &iter))
return;
}
putchar('\n');
}
2007-12-08 17:42:33 -05:00
}
return;
2007-12-08 17:42:33 -05:00
}
static void
update_totals(const lzma_index *idx)
2007-12-08 17:42:33 -05:00
{
// TODO: Integer overflow checks
++totals.files;
totals.streams += lzma_index_stream_count(idx);
totals.blocks += lzma_index_block_count(idx);
totals.compressed_size += lzma_index_file_size(idx);
totals.uncompressed_size += lzma_index_uncompressed_size(idx);
totals.checks |= lzma_index_checks(idx);
return;
}
2007-12-08 17:42:33 -05:00
static void
print_totals_basic(void)
{
// Print a separator line.
char line[80];
memset(line, '-', sizeof(line));
line[sizeof(line) - 1] = '\0';
puts(line);
// Print the totals except the file count, which needs
// special handling.
printf("%5s %7s %11s %11s %5s %-7s ",
uint64_to_str(totals.streams, 0),
uint64_to_str(totals.blocks, 1),
uint64_to_nicestr(totals.compressed_size,
NICESTR_B, NICESTR_TIB, false, 2),
uint64_to_nicestr(totals.uncompressed_size,
NICESTR_B, NICESTR_TIB, false, 3),
get_ratio(totals.compressed_size,
totals.uncompressed_size),
get_check_names(totals.checks, false));
// Since we print totals only when there are at least two files,
// the English message will always use "%s files". But some other
// languages need different forms for different plurals so we
// have to translate this string still.
2007-12-08 17:42:33 -05:00
//
// TRANSLATORS: This simply indicates the number of files shown
// by --list even though the format string uses %s.
printf(N_("%s file", "%s files\n",
totals.files <= ULONG_MAX ? totals.files
: (totals.files % 1000000) + 1000000),
uint64_to_str(totals.files, 0));
2007-12-08 17:42:33 -05:00
return;
}
2007-12-08 17:42:33 -05:00
static void
print_totals_adv(void)
{
putchar('\n');
puts(_("Totals:"));
printf(_(" Number of files: %s\n"),
uint64_to_str(totals.files, 0));
print_adv_helper(totals.streams, totals.blocks,
totals.compressed_size, totals.uncompressed_size,
totals.checks);
2007-12-08 17:42:33 -05:00
return;
}
2007-12-08 17:42:33 -05:00
static void
print_totals_robot(void)
{
printf("totals\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t%" PRIu64
"\t%s\t%s\t%" PRIu64 "\n",
totals.streams,
totals.blocks,
totals.compressed_size,
totals.uncompressed_size,
get_ratio(totals.compressed_size,
totals.uncompressed_size),
get_check_names(totals.checks, false),
totals.files);
2007-12-08 17:42:33 -05:00
return;
}
2007-12-08 17:42:33 -05:00
extern void
list_totals(void)
{
if (opt_robot) {
// Always print totals in --robot mode. It can be convenient
// in some cases and doesn't complicate usage of the
// single-file case much.
print_totals_robot();
} else if (totals.files > 1) {
// For non-robot mode, totals are printed only if there
// is more than one file.
if (message_verbosity_get() <= V_WARNING)
print_totals_basic();
else
print_totals_adv();
2007-12-08 17:42:33 -05:00
}
return;
2007-12-08 17:42:33 -05:00
}
extern void
list_file(const char *filename)
2007-12-08 17:42:33 -05:00
{
if (opt_format != FORMAT_XZ && opt_format != FORMAT_AUTO)
message_fatal(_("--list works only on .xz files "
"(--format=xz or --format=auto)"));
2007-12-08 17:42:33 -05:00
if (strcmp(filename, "-") == 0) {
message_error(_("--list does not support reading from "
"standard input"));
2007-12-08 17:42:33 -05:00
return;
}
if (is_empty_filename(filename))
return;
// Set opt_stdout so that io_open() won't create a new file.
// Disable also sparse mode so that it doesn't remove O_APPEND
// from stdout.
opt_stdout = true;
io_no_sparse();
file_pair *pair = io_open(filename);
if (pair == NULL)
2007-12-08 17:42:33 -05:00
return;
lzma_index *idx;
if (!parse_indexes(&idx, pair)) {
// Update the totals that are displayed after all
// the individual files have been listed.
update_totals(idx);
// We have three main modes:
// - --robot, which has submodes if --verbose is specified
// once or twice
// - Normal --list without --verbose
// - --list with one or two --verbose
if (opt_robot)
print_info_robot(idx, pair);
else if (message_verbosity_get() <= V_WARNING)
print_info_basic(idx, pair);
else
print_info_adv(idx, pair);
2007-12-08 17:42:33 -05:00
lzma_index_end(idx, NULL);
2007-12-08 17:42:33 -05:00
}
io_close(pair, false);
2007-12-08 17:42:33 -05:00
return;
}