xz: Add .lz (lzip) decompression support.

If configured with --disable-lzip-decoder then --long-help will
still list `lzip' in --format but I left it like that since
due to translations it would be messy to have two help strings.
Features are disabled only in special situations so wrong help
in such a situation shouldn't matter much.

Thanks to Michał Górny for the original patch.
This commit is contained in:
Lasse Collin 2022-10-08 21:28:15 +03:00
parent 034086e1ae
commit 3176f992c5
6 changed files with 141 additions and 13 deletions

View File

@ -412,6 +412,9 @@ parse_real(args_info *args, int argc, char **argv)
{ "xz", FORMAT_XZ }, { "xz", FORMAT_XZ },
{ "lzma", FORMAT_LZMA }, { "lzma", FORMAT_LZMA },
{ "alone", FORMAT_LZMA }, { "alone", FORMAT_LZMA },
#ifdef HAVE_LZIP_DECODER
{ "lzip", FORMAT_LZIP },
#endif
// { "gzip", FORMAT_GZIP }, // { "gzip", FORMAT_GZIP },
// { "gz", FORMAT_GZIP }, // { "gz", FORMAT_GZIP },
{ "raw", FORMAT_RAW }, { "raw", FORMAT_RAW },
@ -668,6 +671,12 @@ args_parse(args_info *args, int argc, char **argv)
"at build time")); "at build time"));
#endif #endif
#ifdef HAVE_LZIP_DECODER
if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_LZIP)
message_fatal(_("Compression of lzip files (.lz) "
"is not supported"));
#endif
// Never remove the source file when the destination is not on disk. // Never remove the source file when the destination is not on disk.
// In test mode the data is written nowhere, but setting opt_stdout // In test mode the data is written nowhere, but setting opt_stdout
// will make the rest of the code behave well. // will make the rest of the code behave well.

View File

@ -51,6 +51,11 @@ static lzma_check check;
/// This becomes false if the --check=CHECK option is used. /// This becomes false if the --check=CHECK option is used.
static bool check_default = true; static bool check_default = true;
/// Indicates if unconsumed input is allowed to remain after
/// decoding has successfully finished. This is set for each file
/// in coder_init().
static bool allow_trailing_input;
#ifdef MYTHREAD_ENABLED #ifdef MYTHREAD_ENABLED
static lzma_mt mt_options = { static lzma_mt mt_options = {
.flags = 0, .flags = 0,
@ -136,6 +141,11 @@ memlimit_too_small(uint64_t memory_usage)
extern void extern void
coder_set_compression_settings(void) coder_set_compression_settings(void)
{ {
#ifdef HAVE_LZIP_DECODER
// .lz compression isn't supported.
assert(opt_format != FORMAT_LZIP);
#endif
// The default check type is CRC64, but fallback to CRC32 // The default check type is CRC64, but fallback to CRC32
// if CRC64 isn't supported by the copy of liblzma we are // if CRC64 isn't supported by the copy of liblzma we are
// using. CRC32 is always supported. // using. CRC32 is always supported.
@ -470,6 +480,18 @@ is_format_lzma(void)
return true; return true;
} }
#ifdef HAVE_LZIP_DECODER
/// Return true if the data in in_buf seems to be in the .lz format.
static bool
is_format_lzip(void)
{
static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };
return strm.avail_in >= sizeof(magic)
&& memcmp(in_buf.u8, magic, sizeof(magic)) == 0;
}
#endif
#endif #endif
@ -483,6 +505,12 @@ coder_init(file_pair *pair)
{ {
lzma_ret ret = LZMA_PROG_ERROR; lzma_ret ret = LZMA_PROG_ERROR;
// In most cases if there is input left when coding finishes,
// something has gone wrong. Exceptions are --single-stream
// and decoding .lz files which can contain trailing non-.lz data.
// These will be handled later in this function.
allow_trailing_input = false;
if (opt_mode == MODE_COMPRESS) { if (opt_mode == MODE_COMPRESS) {
#ifdef HAVE_ENCODERS #ifdef HAVE_ENCODERS
switch (opt_format) { switch (opt_format) {
@ -506,6 +534,14 @@ coder_init(file_pair *pair)
ret = lzma_alone_encoder(&strm, filters[0].options); ret = lzma_alone_encoder(&strm, filters[0].options);
break; break;
# ifdef HAVE_LZIP_DECODER
case FORMAT_LZIP:
// args.c should disallow this.
assert(0);
ret = LZMA_PROG_ERROR;
break;
# endif
case FORMAT_RAW: case FORMAT_RAW:
ret = lzma_raw_encoder(&strm, filters); ret = lzma_raw_encoder(&strm, filters);
break; break;
@ -522,7 +558,9 @@ coder_init(file_pair *pair)
else else
flags |= LZMA_TELL_UNSUPPORTED_CHECK; flags |= LZMA_TELL_UNSUPPORTED_CHECK;
if (!opt_single_stream) if (opt_single_stream)
allow_trailing_input = true;
else
flags |= LZMA_CONCATENATED; flags |= LZMA_CONCATENATED;
// We abuse FORMAT_AUTO to indicate unknown file format, // We abuse FORMAT_AUTO to indicate unknown file format,
@ -531,8 +569,14 @@ coder_init(file_pair *pair)
switch (opt_format) { switch (opt_format) {
case FORMAT_AUTO: case FORMAT_AUTO:
// .lz is checked before .lzma since .lzma detection
// is more complicated (no magic bytes).
if (is_format_xz()) if (is_format_xz())
init_format = FORMAT_XZ; init_format = FORMAT_XZ;
# ifdef HAVE_LZIP_DECODER
else if (is_format_lzip())
init_format = FORMAT_LZIP;
# endif
else if (is_format_lzma()) else if (is_format_lzma())
init_format = FORMAT_LZMA; init_format = FORMAT_LZMA;
break; break;
@ -547,6 +591,13 @@ coder_init(file_pair *pair)
init_format = FORMAT_LZMA; init_format = FORMAT_LZMA;
break; break;
# ifdef HAVE_LZIP_DECODER
case FORMAT_LZIP:
if (is_format_lzip())
init_format = FORMAT_LZIP;
break;
# endif
case FORMAT_RAW: case FORMAT_RAW:
init_format = FORMAT_RAW; init_format = FORMAT_RAW;
break; break;
@ -604,6 +655,15 @@ coder_init(file_pair *pair)
MODE_DECOMPRESS)); MODE_DECOMPRESS));
break; break;
# ifdef HAVE_LZIP_DECODER
case FORMAT_LZIP:
allow_trailing_input = true;
ret = lzma_lzip_decoder(&strm,
hardware_memlimit_get(
MODE_DECOMPRESS), flags);
break;
# endif
case FORMAT_RAW: case FORMAT_RAW:
// Memory usage has already been checked in // Memory usage has already been checked in
// coder_set_compression_settings(). // coder_set_compression_settings().
@ -864,7 +924,7 @@ coder_normal(file_pair *pair)
} }
if (ret == LZMA_STREAM_END) { if (ret == LZMA_STREAM_END) {
if (opt_single_stream) { if (allow_trailing_input) {
io_fix_src_pos(pair, strm.avail_in); io_fix_src_pos(pair, strm.avail_in);
success = true; success = true;
break; break;
@ -872,7 +932,9 @@ coder_normal(file_pair *pair)
// Check that there is no trailing garbage. // Check that there is no trailing garbage.
// This is needed for LZMA_Alone and raw // This is needed for LZMA_Alone and raw
// streams. // streams. This is *not* done with .lz files
// as that format specifically requires
// allowing trailing garbage.
if (strm.avail_in == 0 && !pair->src_eof) { if (strm.avail_in == 0 && !pair->src_eof) {
// Try reading one more byte. // Try reading one more byte.
// Hopefully we don't get any more // Hopefully we don't get any more

View File

@ -23,6 +23,9 @@ enum format_type {
FORMAT_AUTO, FORMAT_AUTO,
FORMAT_XZ, FORMAT_XZ,
FORMAT_LZMA, FORMAT_LZMA,
#ifdef HAVE_LZIP_DECODER
FORMAT_LZIP,
#endif
// HEADER_GZIP, // HEADER_GZIP,
FORMAT_RAW, FORMAT_RAW,
}; };

View File

@ -1150,7 +1150,7 @@ message_help(bool long_help)
puts(_("\n Basic file format and compression options:\n")); puts(_("\n Basic file format and compression options:\n"));
puts(_( puts(_(
" -F, --format=FMT file format to encode or decode; possible values are\n" " -F, --format=FMT file format to encode or decode; possible values are\n"
" `auto' (default), `xz', `lzma', and `raw'\n" " `auto' (default), `xz', `lzma', `lzip', and `raw'\n"
" -C, --check=CHECK integrity check type: `none' (use with caution),\n" " -C, --check=CHECK integrity check type: `none' (use with caution),\n"
" `crc32', `crc64' (default), or `sha256'")); " `crc32', `crc64' (default), or `sha256'"));
puts(_( puts(_(

View File

@ -119,7 +119,10 @@ uncompressed_name(const char *src_name, const size_t src_len)
#ifdef __DJGPP__ #ifdef __DJGPP__
{ ".lzm", "" }, { ".lzm", "" },
#endif #endif
{ ".tlz", ".tar" }, { ".tlz", ".tar" }, // Both .tar.lzma and .tar.lz
#ifdef HAVE_LZIP_DECODER
{ ".lz", "" },
#endif
// { ".gz", "" }, // { ".gz", "" },
// { ".tgz", ".tar" }, // { ".tgz", ".tar" },
}; };
@ -208,6 +211,15 @@ compressed_name(const char *src_name, size_t src_len)
#endif #endif
".tlz", ".tlz",
NULL NULL
#ifdef HAVE_LZIP_DECODER
// This is needed to keep the table indexing in sync with
// enum format_type from coder.h.
}, {
/*
".lz",
*/
NULL
#endif
/* /*
}, { }, {
".gz", ".gz",
@ -221,8 +233,11 @@ compressed_name(const char *src_name, size_t src_len)
} }
}; };
// args.c ensures this. // args.c ensures these.
assert(opt_format != FORMAT_AUTO); assert(opt_format != FORMAT_AUTO);
#ifdef HAVE_LZIP_DECODER
assert(opt_format != FORMAT_LZIP);
#endif
const size_t format = opt_format - 1; const size_t format = opt_format - 1;
const char *const *suffixes = all_suffixes[format]; const char *const *suffixes = all_suffixes[format];
@ -299,8 +314,11 @@ compressed_name(const char *src_name, size_t src_len)
// xz foo.tar -> foo.txz // xz foo.tar -> foo.txz
// xz -F lzma foo.tar -> foo.tlz // xz -F lzma foo.tar -> foo.tlz
static const char *const tar_suffixes[] = { static const char *const tar_suffixes[] = {
".txz", ".txz", // .tar.xz
".tlz", ".tlz", // .tar.lzma
/*
".tlz", // .tar.lz
*/
// ".tgz", // ".tgz",
}; };
suffix = tar_suffixes[format]; suffix = tar_suffixes[format];

View File

@ -5,7 +5,7 @@
.\" This file has been put into the public domain. .\" This file has been put into the public domain.
.\" You can do whatever you want with this file. .\" You can do whatever you want with this file.
.\" .\"
.TH XZ 1 "2022-11-07" "Tukaani" "XZ Utils" .TH XZ 1 "2022-11-09" "Tukaani" "XZ Utils"
. .
.SH NAME .SH NAME
xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files
@ -62,6 +62,11 @@ format, but the legacy
format used by LZMA Utils and format used by LZMA Utils and
raw compressed streams with no container format headers raw compressed streams with no container format headers
are also supported. are also supported.
In addition, decompression of the
.B .lz
format used by
.B lzip
is supported.
.PP .PP
.B xz .B xz
compresses or decompresses each compresses or decompresses each
@ -102,9 +107,10 @@ or
is appended to the source filename to get the target filename. is appended to the source filename to get the target filename.
.IP \(bu 3 .IP \(bu 3
When decompressing, the When decompressing, the
.B .xz .BR .xz ,
.BR .lzma ,
or or
.B .lzma .B .lz
suffix is removed from the filename to get the target filename. suffix is removed from the filename to get the target filename.
.B xz .B xz
also recognizes the suffixes also recognizes the suffixes
@ -158,8 +164,9 @@ doesn't have a suffix of any of the supported file formats
.RB ( .xz , .RB ( .xz ,
.BR .txz , .BR .txz ,
.BR .lzma , .BR .lzma ,
.BR .tlz ,
or or
.BR .tlz ). .BR .lz ).
.PP .PP
After successfully compressing or decompressing the After successfully compressing or decompressing the
.IR file , .IR file ,
@ -507,8 +514,9 @@ in addition to files with the
.BR .xz , .BR .xz ,
.BR .txz , .BR .txz ,
.BR .lzma , .BR .lzma ,
.BR .tlz ,
or or
.B .tlz .B .lz
suffix. suffix.
If the source file has the suffix If the source file has the suffix
.IR .suf , .IR .suf ,
@ -575,6 +583,34 @@ The alternative name
.B alone .B alone
is provided for backwards compatibility with LZMA Utils. is provided for backwards compatibility with LZMA Utils.
.TP .TP
.B lzip
Accept only
.B .lz
files when decompressing.
Compression is not supported.
.IP ""
The
.B .lz
format version 0 and the unextended version 1 are supported.
Version 0 files were produced by
.B lzip
1.3 and older.
Such files aren't common but may be found from file archives
as a few source packages were released in this format.
People might have old personal files in this format too.
Decompression support for the format version 0 was removed in
.B lzip
1.18.
.IP ""
.B lzip
1.4 and later create files in the format version 1.
The sync flush marker extension to the format version 1 was added in
.B lzip
1.6.
This extension is rarely used and isn't supported by
.B xz
(diagnosed as corrupt input).
.TP
.B raw .B raw
Compress or uncompress a raw stream (no headers). Compress or uncompress a raw stream (no headers).
This is meant for advanced users only. This is meant for advanced users only.