diff --git a/CMakeLists.txt b/CMakeLists.txt index 656d238..de42abf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -126,4 +126,7 @@ SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) add_library(lzma SHARED ${lzma_sources}) target_link_libraries(lzma system) -install(TARGETS lzma DESTINATION lib${SUFFIX}/darling) +install(TARGETS lzma DESTINATION ${CMAKE_INSTALL_LIBDIR}/darling) + +install(DIRECTORY usr DESTINATION libexec/darling) + diff --git a/usr/include/lzma.h b/usr/include/lzma.h new file mode 100644 index 0000000..ce675a7 --- /dev/null +++ b/usr/include/lzma.h @@ -0,0 +1,321 @@ +/** + * \file api/lzma.h + * \brief The public API of liblzma data compression library + * + * liblzma is a public domain general-purpose data compression library with + * a zlib-like API. The native file format is .xz, but also the old .lzma + * format and raw (no headers) streams are supported. Multiple compression + * algorithms (filters) are supported. Currently LZMA2 is the primary filter. + * + * liblzma is part of XZ Utils . XZ Utils includes + * a gzip-like command line tool named xz and some other tools. XZ Utils + * is developed and maintained by Lasse Collin. + * + * Major parts of liblzma are based on Igor Pavlov's public domain LZMA SDK + * . + * + * The SHA-256 implementation is based on the public domain code found from + * 7-Zip , which has a modified version of the public + * domain SHA-256 code found from Crypto++ . + * The SHA-256 code in Crypto++ was written by Kevin Springle and Wei Dai. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef LZMA_H +#define LZMA_H + +/***************************** + * Required standard headers * + *****************************/ + +/* + * liblzma API headers need some standard types and macros. To allow + * including lzma.h without requiring the application to include other + * headers first, lzma.h includes the required standard headers unless + * they already seem to be included already or if LZMA_MANUAL_HEADERS + * has been defined. + * + * Here's what types and macros are needed and from which headers: + * - stddef.h: size_t, NULL + * - stdint.h: uint8_t, uint32_t, uint64_t, UINT32_C(n), uint64_C(n), + * UINT32_MAX, UINT64_MAX + * + * However, inttypes.h is a little more portable than stdint.h, although + * inttypes.h declares some unneeded things compared to plain stdint.h. + * + * The hacks below aren't perfect, specifically they assume that inttypes.h + * exists and that it typedefs at least uint8_t, uint32_t, and uint64_t, + * and that, in case of incomplete inttypes.h, unsigned int is 32-bit. + * If the application already takes care of setting up all the types and + * macros properly (for example by using gnulib's stdint.h or inttypes.h), + * we try to detect that the macros are already defined and don't include + * inttypes.h here again. However, you may define LZMA_MANUAL_HEADERS to + * force this file to never include any system headers. + * + * Some could argue that liblzma API should provide all the required types, + * for example lzma_uint64, LZMA_UINT64_C(n), and LZMA_UINT64_MAX. This was + * seen as an unnecessary mess, since most systems already provide all the + * necessary types and macros in the standard headers. + * + * Note that liblzma API still has lzma_bool, because using stdbool.h would + * break C89 and C++ programs on many systems. sizeof(bool) in C99 isn't + * necessarily the same as sizeof(bool) in C++. + */ + +#ifndef LZMA_MANUAL_HEADERS + /* + * I suppose this works portably also in C++. Note that in C++, + * we need to get size_t into the global namespace. + */ +# include + + /* + * Skip inttypes.h if we already have all the required macros. If we + * have the macros, we assume that we have the matching typedefs too. + */ +# if !defined(UINT32_C) || !defined(UINT64_C) \ + || !defined(UINT32_MAX) || !defined(UINT64_MAX) + /* + * MSVC versions older than 2013 have no C99 support, and + * thus they cannot be used to compile liblzma. Using an + * existing liblzma.dll with old MSVC can work though(*), + * but we need to define the required standard integer + * types here in a MSVC-specific way. + * + * (*) If you do this, the existing liblzma.dll probably uses + * a different runtime library than your MSVC-built + * application. Mixing runtimes is generally bad, but + * in this case it should work as long as you avoid + * the few rarely-needed liblzma functions that allocate + * memory and expect the caller to free it using free(). + */ +# if defined(_WIN32) && defined(_MSC_VER) && _MSC_VER < 1800 + typedef unsigned __int8 uint8_t; + typedef unsigned __int32 uint32_t; + typedef unsigned __int64 uint64_t; +# else + /* Use the standard inttypes.h. */ +# ifdef __cplusplus + /* + * C99 sections 7.18.2 and 7.18.4 specify + * that C++ implementations define the limit + * and constant macros only if specifically + * requested. Note that if you want the + * format macros (PRIu64 etc.) too, you need + * to define __STDC_FORMAT_MACROS before + * including lzma.h, since re-including + * inttypes.h with __STDC_FORMAT_MACROS + * defined doesn't necessarily work. + */ +# ifndef __STDC_LIMIT_MACROS +# define __STDC_LIMIT_MACROS 1 +# endif +# ifndef __STDC_CONSTANT_MACROS +# define __STDC_CONSTANT_MACROS 1 +# endif +# endif + +# include +# endif + + /* + * Some old systems have only the typedefs in inttypes.h, and + * lack all the macros. For those systems, we need a few more + * hacks. We assume that unsigned int is 32-bit and unsigned + * long is either 32-bit or 64-bit. If these hacks aren't + * enough, the application has to setup the types manually + * before including lzma.h. + */ +# ifndef UINT32_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT32_C(n) n ## UI32 +# else +# define UINT32_C(n) n ## U +# endif +# endif + +# ifndef UINT64_C +# if defined(_WIN32) && defined(_MSC_VER) +# define UINT64_C(n) n ## UI64 +# else + /* Get ULONG_MAX. */ +# include +# if ULONG_MAX == 4294967295UL +# define UINT64_C(n) n ## ULL +# else +# define UINT64_C(n) n ## UL +# endif +# endif +# endif + +# ifndef UINT32_MAX +# define UINT32_MAX (UINT32_C(4294967295)) +# endif + +# ifndef UINT64_MAX +# define UINT64_MAX (UINT64_C(18446744073709551615)) +# endif +# endif +#endif /* ifdef LZMA_MANUAL_HEADERS */ + + +/****************** + * LZMA_API macro * + ******************/ + +/* + * Some systems require that the functions and function pointers are + * declared specially in the headers. LZMA_API_IMPORT is for importing + * symbols and LZMA_API_CALL is to specify the calling convention. + * + * By default it is assumed that the application will link dynamically + * against liblzma. #define LZMA_API_STATIC in your application if you + * want to link against static liblzma. If you don't care about portability + * to operating systems like Windows, or at least don't care about linking + * against static liblzma on them, don't worry about LZMA_API_STATIC. That + * is, most developers will never need to use LZMA_API_STATIC. + * + * The GCC variants are a special case on Windows (Cygwin and MinGW). + * We rely on GCC doing the right thing with its auto-import feature, + * and thus don't use __declspec(dllimport). This way developers don't + * need to worry about LZMA_API_STATIC. Also the calling convention is + * omitted on Cygwin but not on MinGW. + */ +#ifndef LZMA_API_IMPORT +# if !defined(LZMA_API_STATIC) && defined(_WIN32) && !defined(__GNUC__) +# define LZMA_API_IMPORT __declspec(dllimport) +# else +# define LZMA_API_IMPORT +# endif +#endif + +#ifndef LZMA_API_CALL +# if defined(_WIN32) && !defined(__CYGWIN__) +# define LZMA_API_CALL __cdecl +# else +# define LZMA_API_CALL +# endif +#endif + +#ifndef LZMA_API +# define LZMA_API(type) LZMA_API_IMPORT type LZMA_API_CALL +#endif + + +/*********** + * nothrow * + ***********/ + +/* + * None of the functions in liblzma may throw an exception. Even + * the functions that use callback functions won't throw exceptions, + * because liblzma would break if a callback function threw an exception. + */ +#ifndef lzma_nothrow +# if defined(__cplusplus) +# define lzma_nothrow throw() +# elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 3) +# define lzma_nothrow __attribute__((__nothrow__)) +# else +# define lzma_nothrow +# endif +#endif + + +/******************** + * GNU C extensions * + ********************/ + +/* + * GNU C extensions are used conditionally in the public API. It doesn't + * break anything if these are sometimes enabled and sometimes not, only + * affects warnings and optimizations. + */ +#if __GNUC__ >= 3 +# ifndef lzma_attribute +# define lzma_attribute(attr) __attribute__(attr) +# endif + + /* warn_unused_result was added in GCC 3.4. */ +# ifndef lzma_attr_warn_unused_result +# if __GNUC__ == 3 && __GNUC_MINOR__ < 4 +# define lzma_attr_warn_unused_result +# endif +# endif + +#else +# ifndef lzma_attribute +# define lzma_attribute(attr) +# endif +#endif + + +#ifndef lzma_attr_pure +# define lzma_attr_pure lzma_attribute((__pure__)) +#endif + +#ifndef lzma_attr_const +# define lzma_attr_const lzma_attribute((__const__)) +#endif + +#ifndef lzma_attr_warn_unused_result +# define lzma_attr_warn_unused_result \ + lzma_attribute((__warn_unused_result__)) +#endif + + +/************** + * Subheaders * + **************/ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Subheaders check that this is defined. It is to prevent including + * them directly from applications. + */ +#define LZMA_H_INTERNAL 1 + +/* Basic features */ +#include "lzma/version.h" +#include "lzma/base.h" +#include "lzma/vli.h" +#include "lzma/check.h" + +/* Filters */ +#include "lzma/filter.h" +#include "lzma/bcj.h" +#include "lzma/delta.h" +#include "lzma/lzma12.h" + +/* Container formats */ +#include "lzma/container.h" + +/* Advanced features */ +#include "lzma/stream_flags.h" +#include "lzma/block.h" +#include "lzma/index.h" +#include "lzma/index_hash.h" + +/* Hardware information */ +#include "lzma/hardware.h" + +/* + * All subheaders included. Undefine LZMA_H_INTERNAL to prevent applications + * re-including the subheaders. + */ +#undef LZMA_H_INTERNAL + +#ifdef __cplusplus +} +#endif + +#endif /* ifndef LZMA_H */ diff --git a/usr/include/lzma/base.h b/usr/include/lzma/base.h new file mode 100644 index 0000000..7a31a42 --- /dev/null +++ b/usr/include/lzma/base.h @@ -0,0 +1,654 @@ +/** + * \file lzma/base.h + * \brief Data types and functions used in many places in liblzma API + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Boolean + * + * This is here because C89 doesn't have stdbool.h. To set a value for + * variables having type lzma_bool, you can use + * - C99's `true' and `false' from stdbool.h; + * - C++'s internal `true' and `false'; or + * - integers one (true) and zero (false). + */ +typedef unsigned char lzma_bool; + + +/** + * \brief Type of reserved enumeration variable in structures + * + * To avoid breaking library ABI when new features are added, several + * structures contain extra variables that may be used in future. Since + * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may + * even vary depending on the range of enumeration constants, we specify + * a separate type to be used for reserved enumeration variables. All + * enumeration constants in liblzma API will be non-negative and less + * than 128, which should guarantee that the ABI won't break even when + * new constants are added to existing enumerations. + */ +typedef enum { + LZMA_RESERVED_ENUM = 0 +} lzma_reserved_enum; + + +/** + * \brief Return values used by several functions in liblzma + * + * Check the descriptions of specific functions to find out which return + * values they can return. With some functions the return values may have + * more specific meanings than described here; those differences are + * described per-function basis. + */ +typedef enum { + LZMA_OK = 0, + /**< + * \brief Operation completed successfully + */ + + LZMA_STREAM_END = 1, + /**< + * \brief End of stream was reached + * + * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or + * LZMA_FINISH was finished. In decoder, this indicates + * that all the data was successfully decoded. + * + * In all cases, when LZMA_STREAM_END is returned, the last + * output bytes should be picked from strm->next_out. + */ + + LZMA_NO_CHECK = 2, + /**< + * \brief Input stream has no integrity check + * + * This return value can be returned only if the + * LZMA_TELL_NO_CHECK flag was used when initializing + * the decoder. LZMA_NO_CHECK is just a warning, and + * the decoding can be continued normally. + * + * It is possible to call lzma_get_check() immediately after + * lzma_code has returned LZMA_NO_CHECK. The result will + * naturally be LZMA_CHECK_NONE, but the possibility to call + * lzma_get_check() may be convenient in some applications. + */ + + LZMA_UNSUPPORTED_CHECK = 3, + /**< + * \brief Cannot calculate the integrity check + * + * The usage of this return value is different in encoders + * and decoders. + * + * Encoders can return this value only from the initialization + * function. If initialization fails with this value, the + * encoding cannot be done, because there's no way to produce + * output with the correct integrity check. + * + * Decoders can return this value only from lzma_code() and + * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when + * initializing the decoder. The decoding can still be + * continued normally even if the check type is unsupported, + * but naturally the check will not be validated, and possible + * errors may go undetected. + * + * With decoder, it is possible to call lzma_get_check() + * immediately after lzma_code() has returned + * LZMA_UNSUPPORTED_CHECK. This way it is possible to find + * out what the unsupported Check ID was. + */ + + LZMA_GET_CHECK = 4, + /**< + * \brief Integrity check type is now available + * + * This value can be returned only by the lzma_code() function + * and only if the decoder was initialized with the + * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the + * application that it may now call lzma_get_check() to find + * out the Check ID. This can be used, for example, to + * implement a decoder that accepts only files that have + * strong enough integrity check. + */ + + LZMA_MEM_ERROR = 5, + /**< + * \brief Cannot allocate memory + * + * Memory allocation failed, or the size of the allocation + * would be greater than SIZE_MAX. + * + * Due to internal implementation reasons, the coding cannot + * be continued even if more memory were made available after + * LZMA_MEM_ERROR. + */ + + LZMA_MEMLIMIT_ERROR = 6, + /** + * \brief Memory usage limit was reached + * + * Decoder would need more memory than allowed by the + * specified memory usage limit. To continue decoding, + * the memory usage limit has to be increased with + * lzma_memlimit_set(). + */ + + LZMA_FORMAT_ERROR = 7, + /**< + * \brief File format not recognized + * + * The decoder did not recognize the input as supported file + * format. This error can occur, for example, when trying to + * decode .lzma format file with lzma_stream_decoder, + * because lzma_stream_decoder accepts only the .xz format. + */ + + LZMA_OPTIONS_ERROR = 8, + /**< + * \brief Invalid or unsupported options + * + * Invalid or unsupported options, for example + * - unsupported filter(s) or filter options; or + * - reserved bits set in headers (decoder only). + * + * Rebuilding liblzma with more features enabled, or + * upgrading to a newer version of liblzma may help. + */ + + LZMA_DATA_ERROR = 9, + /**< + * \brief Data is corrupt + * + * The usage of this return value is different in encoders + * and decoders. In both encoder and decoder, the coding + * cannot continue after this error. + * + * Encoders return this if size limits of the target file + * format would be exceeded. These limits are huge, thus + * getting this error from an encoder is mostly theoretical. + * For example, the maximum compressed and uncompressed + * size of a .xz Stream is roughly 8 EiB (2^63 bytes). + * + * Decoders return this error if the input data is corrupt. + * This can mean, for example, invalid CRC32 in headers + * or invalid check of uncompressed data. + */ + + LZMA_BUF_ERROR = 10, + /**< + * \brief No progress is possible + * + * This error code is returned when the coder cannot consume + * any new input and produce any new output. The most common + * reason for this error is that the input stream being + * decoded is truncated or corrupt. + * + * This error is not fatal. Coding can be continued normally + * by providing more input and/or more output space, if + * possible. + * + * Typically the first call to lzma_code() that can do no + * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only + * the second consecutive call doing no progress will return + * LZMA_BUF_ERROR. This is intentional. + * + * With zlib, Z_BUF_ERROR may be returned even if the + * application is doing nothing wrong, so apps will need + * to handle Z_BUF_ERROR specially. The above hack + * guarantees that liblzma never returns LZMA_BUF_ERROR + * to properly written applications unless the input file + * is truncated or corrupt. This should simplify the + * applications a little. + */ + + LZMA_PROG_ERROR = 11, + /**< + * \brief Programming error + * + * This indicates that the arguments given to the function are + * invalid or the internal state of the decoder is corrupt. + * - Function arguments are invalid or the structures + * pointed by the argument pointers are invalid + * e.g. if strm->next_out has been set to NULL and + * strm->avail_out > 0 when calling lzma_code(). + * - lzma_* functions have been called in wrong order + * e.g. lzma_code() was called right after lzma_end(). + * - If errors occur randomly, the reason might be flaky + * hardware. + * + * If you think that your code is correct, this error code + * can be a sign of a bug in liblzma. See the documentation + * how to report bugs. + */ +} lzma_ret; + + +/** + * \brief The `action' argument for lzma_code() + * + * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, LZMA_FULL_BARRIER, + * or LZMA_FINISH, the same `action' must is used until lzma_code() returns + * LZMA_STREAM_END. Also, the amount of input (that is, strm->avail_in) must + * not be modified by the application until lzma_code() returns + * LZMA_STREAM_END. Changing the `action' or modifying the amount of input + * will make lzma_code() return LZMA_PROG_ERROR. + */ +typedef enum { + LZMA_RUN = 0, + /**< + * \brief Continue coding + * + * Encoder: Encode as much input as possible. Some internal + * buffering will probably be done (depends on the filter + * chain in use), which causes latency: the input used won't + * usually be decodeable from the output of the same + * lzma_code() call. + * + * Decoder: Decode as much input as possible and produce as + * much output as possible. + */ + + LZMA_SYNC_FLUSH = 1, + /**< + * \brief Make all the input available at output + * + * Normally the encoder introduces some latency. + * LZMA_SYNC_FLUSH forces all the buffered data to be + * available at output without resetting the internal + * state of the encoder. This way it is possible to use + * compressed stream for example for communication over + * network. + * + * Only some filters support LZMA_SYNC_FLUSH. Trying to use + * LZMA_SYNC_FLUSH with filters that don't support it will + * make lzma_code() return LZMA_OPTIONS_ERROR. For example, + * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does. + * + * Using LZMA_SYNC_FLUSH very often can dramatically reduce + * the compression ratio. With some filters (for example, + * LZMA2), fine-tuning the compression options may help + * mitigate this problem significantly (for example, + * match finder with LZMA2). + * + * Decoders don't support LZMA_SYNC_FLUSH. + */ + + LZMA_FULL_FLUSH = 2, + /**< + * \brief Finish encoding of the current Block + * + * All the input data going to the current Block must have + * been given to the encoder (the last bytes can still be + * pending in *next_in). Call lzma_code() with LZMA_FULL_FLUSH + * until it returns LZMA_STREAM_END. Then continue normally + * with LZMA_RUN or finish the Stream with LZMA_FINISH. + * + * This action is currently supported only by Stream encoder + * and easy encoder (which uses Stream encoder). If there is + * no unfinished Block, no empty Block is created. + */ + + LZMA_FULL_BARRIER = 4, + /**< + * \brief Finish encoding of the current Block + * + * This is like LZMA_FULL_FLUSH except that this doesn't + * necessarily wait until all the input has been made + * available via the output buffer. That is, lzma_code() + * might return LZMA_STREAM_END as soon as all the input + * has been consumed (avail_in == 0). + * + * LZMA_FULL_BARRIER is useful with a threaded encoder if + * one wants to split the .xz Stream into Blocks at specific + * offsets but doesn't care if the output isn't flushed + * immediately. Using LZMA_FULL_BARRIER allows keeping + * the threads busy while LZMA_FULL_FLUSH would make + * lzma_code() wait until all the threads have finished + * until more data could be passed to the encoder. + * + * With a lzma_stream initialized with the single-threaded + * lzma_stream_encoder() or lzma_easy_encoder(), + * LZMA_FULL_BARRIER is an alias for LZMA_FULL_FLUSH. + */ + + LZMA_FINISH = 3 + /**< + * \brief Finish the coding operation + * + * All the input data must have been given to the encoder + * (the last bytes can still be pending in next_in). + * Call lzma_code() with LZMA_FINISH until it returns + * LZMA_STREAM_END. Once LZMA_FINISH has been used, + * the amount of input must no longer be changed by + * the application. + * + * When decoding, using LZMA_FINISH is optional unless the + * LZMA_CONCATENATED flag was used when the decoder was + * initialized. When LZMA_CONCATENATED was not used, the only + * effect of LZMA_FINISH is that the amount of input must not + * be changed just like in the encoder. + */ +} lzma_action; + + +/** + * \brief Custom functions for memory handling + * + * A pointer to lzma_allocator may be passed via lzma_stream structure + * to liblzma, and some advanced functions take a pointer to lzma_allocator + * as a separate function argument. The library will use the functions + * specified in lzma_allocator for memory handling instead of the default + * malloc() and free(). C++ users should note that the custom memory + * handling functions must not throw exceptions. + * + * Single-threaded mode only: liblzma doesn't make an internal copy of + * lzma_allocator. Thus, it is OK to change these function pointers in + * the middle of the coding process, but obviously it must be done + * carefully to make sure that the replacement `free' can deallocate + * memory allocated by the earlier `alloc' function(s). + * + * Multithreaded mode: liblzma might internally store pointers to the + * lzma_allocator given via the lzma_stream structure. The application + * must not change the allocator pointer in lzma_stream or the contents + * of the pointed lzma_allocator structure until lzma_end() has been used + * to free the memory associated with that lzma_stream. The allocation + * functions might be called simultaneously from multiple threads, and + * thus they must be thread safe. + */ +typedef struct { + /** + * \brief Pointer to a custom memory allocation function + * + * If you don't want a custom allocator, but still want + * custom free(), set this to NULL and liblzma will use + * the standard malloc(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param nmemb Number of elements like in calloc(). liblzma + * will always set nmemb to 1, so it is safe to + * ignore nmemb in a custom allocator if you like. + * The nmemb argument exists only for + * compatibility with zlib and libbzip2. + * \param size Size of an element in bytes. + * liblzma never sets this to zero. + * + * \return Pointer to the beginning of a memory block of + * `size' bytes, or NULL if allocation fails + * for some reason. When allocation fails, functions + * of liblzma return LZMA_MEM_ERROR. + * + * The allocator should not waste time zeroing the allocated buffers. + * This is not only about speed, but also memory usage, since the + * operating system kernel doesn't necessarily allocate the requested + * memory in physical memory until it is actually used. With small + * input files, liblzma may actually need only a fraction of the + * memory that it requested for allocation. + * + * \note LZMA_MEM_ERROR is also used when the size of the + * allocation would be greater than SIZE_MAX. Thus, + * don't assume that the custom allocator must have + * returned NULL if some function from liblzma + * returns LZMA_MEM_ERROR. + */ + void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size); + + /** + * \brief Pointer to a custom memory freeing function + * + * If you don't want a custom freeing function, but still + * want a custom allocator, set this to NULL and liblzma + * will use the standard free(). + * + * \param opaque lzma_allocator.opaque (see below) + * \param ptr Pointer returned by lzma_allocator.alloc(), + * or when it is set to NULL, a pointer returned + * by the standard malloc(). + */ + void (LZMA_API_CALL *free)(void *opaque, void *ptr); + + /** + * \brief Pointer passed to .alloc() and .free() + * + * opaque is passed as the first argument to lzma_allocator.alloc() + * and lzma_allocator.free(). This intended to ease implementing + * custom memory allocation functions for use with liblzma. + * + * If you don't need this, you should set this to NULL. + */ + void *opaque; + +} lzma_allocator; + + +/** + * \brief Internal data structure + * + * The contents of this structure is not visible outside the library. + */ +typedef struct lzma_internal_s lzma_internal; + + +/** + * \brief Passing data to and from liblzma + * + * The lzma_stream structure is used for + * - passing pointers to input and output buffers to liblzma; + * - defining custom memory hander functions; and + * - holding a pointer to coder-specific internal data structures. + * + * Typical usage: + * + * - After allocating lzma_stream (on stack or with malloc()), it must be + * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details). + * + * - Initialize a coder to the lzma_stream, for example by using + * lzma_easy_encoder() or lzma_auto_decoder(). Some notes: + * - In contrast to zlib, strm->next_in and strm->next_out are + * ignored by all initialization functions, thus it is safe + * to not initialize them yet. + * - The initialization functions always set strm->total_in and + * strm->total_out to zero. + * - If the initialization function fails, no memory is left allocated + * that would require freeing with lzma_end() even if some memory was + * associated with the lzma_stream structure when the initialization + * function was called. + * + * - Use lzma_code() to do the actual work. + * + * - Once the coding has been finished, the existing lzma_stream can be + * reused. It is OK to reuse lzma_stream with different initialization + * function without calling lzma_end() first. Old allocations are + * automatically freed. + * + * - Finally, use lzma_end() to free the allocated memory. lzma_end() never + * frees the lzma_stream structure itself. + * + * Application may modify the values of total_in and total_out as it wants. + * They are updated by liblzma to match the amount of data read and + * written but aren't used for anything else except as a possible return + * values from lzma_get_progress(). + */ +typedef struct { + const uint8_t *next_in; /**< Pointer to the next input byte. */ + size_t avail_in; /**< Number of available input bytes in next_in. */ + uint64_t total_in; /**< Total number of bytes read by liblzma. */ + + uint8_t *next_out; /**< Pointer to the next output position. */ + size_t avail_out; /**< Amount of free space in next_out. */ + uint64_t total_out; /**< Total number of bytes written by liblzma. */ + + /** + * \brief Custom memory allocation functions + * + * In most cases this is NULL which makes liblzma use + * the standard malloc() and free(). + * + * \note In 5.0.x this is not a const pointer. + */ + const lzma_allocator *allocator; + + /** Internal state is not visible to applications. */ + lzma_internal *internal; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. Excluding the initialization of this structure, + * you should not touch these, because the names of these variables + * may change. + */ + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + void *reserved_ptr4; + uint64_t reserved_int1; + uint64_t reserved_int2; + size_t reserved_int3; + size_t reserved_int4; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + +} lzma_stream; + + +/** + * \brief Initialization for lzma_stream + * + * When you declare an instance of lzma_stream, you can immediately + * initialize it so that initialization functions know that no memory + * has been allocated yet: + * + * lzma_stream strm = LZMA_STREAM_INIT; + * + * If you need to initialize a dynamically allocated lzma_stream, you can use + * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this + * violates the C standard since NULL may have different internal + * representation than zero, but it should be portable enough in practice. + * Anyway, for maximum portability, you can use something like this: + * + * lzma_stream tmp = LZMA_STREAM_INIT; + * *strm = tmp; + */ +#define LZMA_STREAM_INIT \ + { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \ + NULL, NULL, NULL, NULL, 0, 0, 0, 0, \ + LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM } + + +/** + * \brief Encode or decode data + * + * Once the lzma_stream has been successfully initialized (e.g. with + * lzma_stream_encoder()), the actual encoding or decoding is done + * using this function. The application has to update strm->next_in, + * strm->avail_in, strm->next_out, and strm->avail_out to pass input + * to and get output from liblzma. + * + * See the description of the coder-specific initialization function to find + * out what `action' values are supported by the coder. + */ +extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Free memory allocated for the coder data structures + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * + * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other + * members of the lzma_stream structure are touched. + * + * \note zlib indicates an error if application end()s unfinished + * stream structure. liblzma doesn't do this, and assumes that + * application knows what it is doing. + */ +extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow; + + +/** + * \brief Get progress information + * + * In single-threaded mode, applications can get progress information from + * strm->total_in and strm->total_out. In multi-threaded mode this is less + * useful because a significant amount of both input and output data gets + * buffered internally by liblzma. This makes total_in and total_out give + * misleading information and also makes the progress indicator updates + * non-smooth. + * + * This function gives realistic progress information also in multi-threaded + * mode by taking into account the progress made by each thread. In + * single-threaded mode *progress_in and *progress_out are set to + * strm->total_in and strm->total_out, respectively. + */ +extern LZMA_API(void) lzma_get_progress(lzma_stream *strm, + uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow; + + +/** + * \brief Get the memory usage of decoder filter chain + * + * This function is currently supported only when *strm has been initialized + * with a function that takes a memlimit argument. With other functions, you + * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage() + * to estimate the memory requirements. + * + * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big + * the memory usage limit should have been to decode the input. Note that + * this may give misleading information if decoding .xz Streams that have + * multiple Blocks, because each Block can have different memory requirements. + * + * \return How much memory is currently allocated for the filter + * decoders. If no filter chain is currently allocated, + * some non-zero value is still returned, which is less than + * or equal to what any filter chain would indicate as its + * memory requirement. + * + * If this function isn't supported by *strm or some other error + * occurs, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the current memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return On success, the current memory usage limit is returned + * (always non-zero). On error, zero is returned. + */ +extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the memory usage limit + * + * This function is supported only when *strm has been initialized with + * a function that takes a memlimit argument. + * + * \return - LZMA_OK: New memory usage limit successfully set. + * - LZMA_MEMLIMIT_ERROR: The new limit is too small. + * The limit was not changed. + * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't + * support memory usage limit or memlimit was zero. + */ +extern LZMA_API(lzma_ret) lzma_memlimit_set( + lzma_stream *strm, uint64_t memlimit) lzma_nothrow; diff --git a/usr/include/lzma/bcj.h b/usr/include/lzma/bcj.h new file mode 100644 index 0000000..8e37538 --- /dev/null +++ b/usr/include/lzma/bcj.h @@ -0,0 +1,90 @@ +/** + * \file lzma/bcj.h + * \brief Branch/Call/Jump conversion filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* Filter IDs for lzma_filter.id */ + +#define LZMA_FILTER_X86 LZMA_VLI_C(0x04) + /**< + * Filter for x86 binaries + */ + +#define LZMA_FILTER_POWERPC LZMA_VLI_C(0x05) + /**< + * Filter for Big endian PowerPC binaries + */ + +#define LZMA_FILTER_IA64 LZMA_VLI_C(0x06) + /**< + * Filter for IA-64 (Itanium) binaries. + */ + +#define LZMA_FILTER_ARM LZMA_VLI_C(0x07) + /**< + * Filter for ARM binaries. + */ + +#define LZMA_FILTER_ARMTHUMB LZMA_VLI_C(0x08) + /**< + * Filter for ARM-Thumb binaries. + */ + +#define LZMA_FILTER_SPARC LZMA_VLI_C(0x09) + /**< + * Filter for SPARC binaries. + */ + + +/** + * \brief Options for BCJ filters + * + * The BCJ filters never change the size of the data. Specifying options + * for them is optional: if pointer to options is NULL, default value is + * used. You probably never need to specify options to BCJ filters, so just + * set the options pointer to NULL and be happy. + * + * If options with non-default values have been specified when encoding, + * the same options must also be specified when decoding. + * + * \note At the moment, none of the BCJ filters support + * LZMA_SYNC_FLUSH. If LZMA_SYNC_FLUSH is specified, + * LZMA_OPTIONS_ERROR will be returned. If there is need, + * partial support for LZMA_SYNC_FLUSH can be added in future. + * Partial means that flushing would be possible only at + * offsets that are multiple of 2, 4, or 16 depending on + * the filter, except x86 which cannot be made to support + * LZMA_SYNC_FLUSH predictably. + */ +typedef struct { + /** + * \brief Start offset for conversions + * + * This setting is useful only when the same filter is used + * _separately_ for multiple sections of the same executable file, + * and the sections contain cross-section branch/call/jump + * instructions. In that case it is beneficial to set the start + * offset of the non-first sections so that the relative addresses + * of the cross-section branch/call/jump instructions will use the + * same absolute addresses as in the first section. + * + * When the pointer to options is NULL, the default value (zero) + * is used. + */ + uint32_t start_offset; + +} lzma_options_bcj; diff --git a/usr/include/lzma/block.h b/usr/include/lzma/block.h new file mode 100644 index 0000000..7bdcfd7 --- /dev/null +++ b/usr/include/lzma/block.h @@ -0,0 +1,581 @@ +/** + * \file lzma/block.h + * \brief .xz Block handling + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Options for the Block and Block Header encoders and decoders + * + * Different Block handling functions use different parts of this structure. + * Some read some members, other functions write, and some do both. Only the + * members listed for reading need to be initialized when the specified + * functions are called. The members marked for writing will be assigned + * new values at some point either by calling the given function or by + * later calls to lzma_code(). + */ +typedef struct { + /** + * \brief Block format version + * + * To prevent API and ABI breakages when new features are needed, + * a version number is used to indicate which fields in this + * structure are in use: + * - liblzma >= 5.0.0: version = 0 is supported. + * - liblzma >= 5.1.4beta: Support for version = 1 was added, + * which adds the ignore_check field. + * + * If version is greater than one, most Block related functions + * will return LZMA_OPTIONS_ERROR (lzma_block_header_decode() works + * with any version value). + * + * Read by: + * - All functions that take pointer to lzma_block as argument, + * including lzma_block_header_decode(). + * + * Written by: + * - lzma_block_header_decode() + */ + uint32_t version; + + /** + * \brief Size of the Block Header field + * + * This is always a multiple of four. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_size() + * - lzma_block_buffer_encode() + */ + uint32_t header_size; +# define LZMA_BLOCK_HEADER_SIZE_MIN 8 +# define LZMA_BLOCK_HEADER_SIZE_MAX 1024 + + /** + * \brief Type of integrity Check + * + * The Check ID is not stored into the Block Header, thus its value + * must be provided also when decoding. + * + * Read by: + * - lzma_block_header_encode() + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_check check; + + /** + * \brief Size of the Compressed Data in bytes + * + * Encoding: If this is not LZMA_VLI_UNKNOWN, Block Header encoder + * will store this value to the Block Header. Block encoder doesn't + * care about this value, but will set it once the encoding has been + * finished. + * + * Decoding: If this is not LZMA_VLI_UNKNOWN, Block decoder will + * verify that the size of the Compressed Data field matches + * compressed_size. + * + * Usually you don't know this value when encoding in streamed mode, + * and thus cannot write this field into the Block Header. + * + * In non-streamed mode you can reserve space for this field before + * encoding the actual Block. After encoding the data, finish the + * Block by encoding the Block Header. Steps in detail: + * + * - Set compressed_size to some big enough value. If you don't know + * better, use LZMA_VLI_MAX, but remember that bigger values take + * more space in Block Header. + * + * - Call lzma_block_header_size() to see how much space you need to + * reserve for the Block Header. + * + * - Encode the Block using lzma_block_encoder() and lzma_code(). + * It sets compressed_size to the correct value. + * + * - Use lzma_block_header_encode() to encode the Block Header. + * Because space was reserved in the first step, you don't need + * to call lzma_block_header_size() anymore, because due to + * reserving, header_size has to be big enough. If it is "too big", + * lzma_block_header_encode() will add enough Header Padding to + * make Block Header to match the size specified by header_size. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_compressed_size() + * - lzma_block_unpadded_size() + * - lzma_block_total_size() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_compressed_size() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed Size in bytes + * + * This is handled very similarly to compressed_size above. + * + * uncompressed_size is needed by fewer functions than + * compressed_size. This is because uncompressed_size isn't + * needed to validate that Block stays within proper limits. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + lzma_vli uncompressed_size; + + /** + * \brief Array of filters + * + * There can be 1-4 filters. The end of the array is marked with + * .id = LZMA_VLI_UNKNOWN. + * + * Read by: + * - lzma_block_header_size() + * - lzma_block_header_encode() + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + * + * Written by: + * - lzma_block_header_decode(): Note that this does NOT free() + * the old filter options structures. All unused filters[] will + * have .id == LZMA_VLI_UNKNOWN and .options == NULL. If + * decoding fails, all filters[] are guaranteed to be + * LZMA_VLI_UNKNOWN and NULL. + * + * \note Because of the array is terminated with + * .id = LZMA_VLI_UNKNOWN, the actual array must + * have LZMA_FILTERS_MAX + 1 members or the Block + * Header decoder will overflow the buffer. + */ + lzma_filter *filters; + + /** + * \brief Raw value stored in the Check field + * + * After successful coding, the first lzma_check_size(check) bytes + * of this array contain the raw value stored in the Check field. + * + * Note that CRC32 and CRC64 are stored in little endian byte order. + * Take it into account if you display the Check values to the user. + * + * Written by: + * - lzma_block_encoder() + * - lzma_block_decoder() + * - lzma_block_buffer_encode() + * - lzma_block_buffer_decode() + */ + uint8_t raw_check[LZMA_CHECK_SIZE_MAX]; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + uint32_t reserved_int1; + uint32_t reserved_int2; + lzma_vli reserved_int3; + lzma_vli reserved_int4; + lzma_vli reserved_int5; + lzma_vli reserved_int6; + lzma_vli reserved_int7; + lzma_vli reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + + /** + * \brief A flag to Block decoder to not verify the Check field + * + * This field is supported by liblzma >= 5.1.4beta if .version >= 1. + * + * If this is set to true, the integrity check won't be calculated + * and verified. Unless you know what you are doing, you should + * leave this to false. (A reason to set this to true is when the + * file integrity is verified externally anyway and you want to + * speed up the decompression, which matters mostly when using + * SHA-256 as the integrity check.) + * + * If .version >= 1, read by: + * - lzma_block_decoder() + * - lzma_block_buffer_decode() + * + * Written by (.version is ignored): + * - lzma_block_header_decode() always sets this to false + */ + lzma_bool ignore_check; + + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + +} lzma_block; + + +/** + * \brief Decode the Block Header Size field + * + * To decode Block Header using lzma_block_header_decode(), the size of the + * Block Header has to be known and stored into lzma_block.header_size. + * The size can be calculated from the first byte of a Block using this macro. + * Note that if the first byte is 0x00, it indicates beginning of Index; use + * this macro only when the byte is not 0x00. + * + * There is no encoding macro, because Block Header encoder is enough for that. + */ +#define lzma_block_header_size_decode(b) (((uint32_t)(b) + 1) * 4) + + +/** + * \brief Calculate Block Header Size + * + * Calculate the minimum size needed for the Block Header field using the + * settings specified in the lzma_block structure. Note that it is OK to + * increase the calculated header_size value as long as it is a multiple of + * four and doesn't exceed LZMA_BLOCK_HEADER_SIZE_MAX. Increasing header_size + * just means that lzma_block_header_encode() will add Header Padding. + * + * \return - LZMA_OK: Size calculated successfully and stored to + * block->header_size. + * - LZMA_OPTIONS_ERROR: Unsupported version, filters or + * filter options. + * - LZMA_PROG_ERROR: Invalid values like compressed_size == 0. + * + * \note This doesn't check that all the options are valid i.e. this + * may return LZMA_OK even if lzma_block_header_encode() or + * lzma_block_encoder() would fail. If you want to validate the + * filter chain, consider using lzma_memlimit_encoder() which as + * a side-effect validates the filter chain. + */ +extern LZMA_API(lzma_ret) lzma_block_header_size(lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Block Header + * + * The caller must have calculated the size of the Block Header already with + * lzma_block_header_size(). If a value larger than the one calculated by + * lzma_block_header_size() is used, the Block Header will be padded to the + * specified size. + * + * \param out Beginning of the output buffer. This must be + * at least block->header_size bytes. + * \param block Block options to be encoded. + * + * \return - LZMA_OK: Encoding was successful. block->header_size + * bytes were written to output buffer. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_encode( + const lzma_block *block, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Block Header + * + * block->version should (usually) be set to the highest value supported + * by the application. If the application sets block->version to a value + * higher than supported by the current liblzma version, this function will + * downgrade block->version to the highest value supported by it. Thus one + * should check the value of block->version after calling this function if + * block->version was set to a non-zero value and the application doesn't + * otherwise know that the liblzma version being used is new enough to + * support the specified block->version. + * + * The size of the Block Header must have already been decoded with + * lzma_block_header_size_decode() macro and stored to block->header_size. + * + * The integrity check type from Stream Header must have been stored + * to block->check. + * + * block->filters must have been allocated, but they don't need to be + * initialized (possible existing filter options are not freed). + * + * \param block Destination for Block options. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() (and also free() + * if an error occurs). + * \param in Beginning of the input buffer. This must be + * at least block->header_size bytes. + * + * \return - LZMA_OK: Decoding was successful. block->header_size + * bytes were read from the input buffer. + * - LZMA_OPTIONS_ERROR: The Block Header specifies some + * unsupported options such as unsupported filters. This can + * happen also if block->version was set to a too low value + * compared to what would be required to properly represent + * the information stored in the Block Header. + * - LZMA_DATA_ERROR: Block Header is corrupt, for example, + * the CRC32 doesn't match. + * - LZMA_PROG_ERROR: Invalid arguments, for example + * block->header_size is invalid or block->filters is NULL. + */ +extern LZMA_API(lzma_ret) lzma_block_header_decode(lzma_block *block, + const lzma_allocator *allocator, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Validate and set Compressed Size according to Unpadded Size + * + * Block Header stores Compressed Size, but Index has Unpadded Size. If the + * application has already parsed the Index and is now decoding Blocks, + * it can calculate Compressed Size from Unpadded Size. This function does + * exactly that with error checking: + * + * - Compressed Size calculated from Unpadded Size must be positive integer, + * that is, Unpadded Size must be big enough that after Block Header and + * Check fields there's still at least one byte for Compressed Size. + * + * - If Compressed Size was present in Block Header, the new value + * calculated from Unpadded Size is compared against the value + * from Block Header. + * + * \note This function must be called _after_ decoding the Block Header + * field so that it can properly validate Compressed Size if it + * was present in Block Header. + * + * \return - LZMA_OK: block->compressed_size was set successfully. + * - LZMA_DATA_ERROR: unpadded_size is too small compared to + * block->header_size and lzma_check_size(block->check). + * - LZMA_PROG_ERROR: Some values are invalid. For example, + * block->header_size must be a multiple of four and + * between 8 and 1024 inclusive. + */ +extern LZMA_API(lzma_ret) lzma_block_compressed_size( + lzma_block *block, lzma_vli unpadded_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate Unpadded Size + * + * The Index field stores Unpadded Size and Uncompressed Size. The latter + * can be taken directly from the lzma_block structure after coding a Block, + * but Unpadded Size needs to be calculated from Block Header Size, + * Compressed Size, and size of the Check field. This is where this function + * is needed. + * + * \return Unpadded Size on success, or zero on error. + */ +extern LZMA_API(lzma_vli) lzma_block_unpadded_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate the total encoded size of a Block + * + * This is equivalent to lzma_block_unpadded_size() except that the returned + * value includes the size of the Block Padding field. + * + * \return On success, total encoded size of the Block. On error, + * zero is returned. + */ +extern LZMA_API(lzma_vli) lzma_block_total_size(const lzma_block *block) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Block encoder + * + * Valid actions for lzma_code() are LZMA_RUN, LZMA_SYNC_FLUSH (only if the + * filter chain supports it), and LZMA_FINISH. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_UNSUPPORTED_CHECK: block->check specifies a Check ID + * that is not supported by this buid of liblzma. Initializing + * the encoder failed. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_encoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Block decoder + * + * Valid actions for lzma_code() are LZMA_RUN and LZMA_FINISH. Using + * LZMA_FINISH is not required. It is supported only for convenience. + * + * \return - LZMA_OK: All good, continue with lzma_code(). + * - LZMA_UNSUPPORTED_CHECK: Initialization was successful, but + * the given Check ID is not supported, thus Check will be + * ignored. + * - LZMA_PROG_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_decoder( + lzma_stream *strm, lzma_block *block) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate maximum output size for single-call Block encoding + * + * This is equivalent to lzma_stream_buffer_bound() but for .xz Blocks. + * See the documentation of lzma_stream_buffer_bound(). + */ +extern LZMA_API(size_t) lzma_block_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Block encoder + * + * In contrast to the multi-call encoder initialized with + * lzma_block_encoder(), this function encodes also the Block Header. This + * is required to make it possible to write appropriate Block Header also + * in case the data isn't compressible, and different filter chain has to be + * used to encode the data in uncompressed form using uncompressed chunks + * of the LZMA2 filter. + * + * When the data isn't compressible, header_size, compressed_size, and + * uncompressed_size are set just like when the data was compressible, but + * it is possible that header_size is too small to hold the filter chain + * specified in block->filters, because that isn't necessarily the filter + * chain that was actually used to encode the data. lzma_block_unpadded_size() + * still works normally, because it doesn't read the filters array. + * + * \param block Block options: block->version, block->check, + * and block->filters must have been initialized. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_encode( + lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call uncompressed .xz Block encoder + * + * This is like lzma_block_buffer_encode() except this doesn't try to + * compress the data and instead encodes the data using LZMA2 uncompressed + * chunks. The required output buffer size can be determined with + * lzma_block_buffer_bound(). + * + * Since the data won't be compressed, this function ignores block->filters. + * This function doesn't take lzma_allocator because this function doesn't + * allocate any memory from the heap. + */ +extern LZMA_API(lzma_ret) lzma_block_uncomp_encode(lzma_block *block, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Block decoder + * + * This is single-call equivalent of lzma_block_decoder(), and requires that + * the caller has already decoded Block Header and checked its memory usage. + * + * \param block Block options just like with lzma_block_decoder(). + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_MEM_ERROR + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_block_buffer_decode( + lzma_block *block, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow; diff --git a/usr/include/lzma/check.h b/usr/include/lzma/check.h new file mode 100644 index 0000000..6a243db --- /dev/null +++ b/usr/include/lzma/check.h @@ -0,0 +1,150 @@ +/** + * \file lzma/check.h + * \brief Integrity checks + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Type of the integrity check (Check ID) + * + * The .xz format supports multiple types of checks that are calculated + * from the uncompressed data. They vary in both speed and ability to + * detect errors. + */ +typedef enum { + LZMA_CHECK_NONE = 0, + /**< + * No Check is calculated. + * + * Size of the Check field: 0 bytes + */ + + LZMA_CHECK_CRC32 = 1, + /**< + * CRC32 using the polynomial from the IEEE 802.3 standard + * + * Size of the Check field: 4 bytes + */ + + LZMA_CHECK_CRC64 = 4, + /**< + * CRC64 using the polynomial from the ECMA-182 standard + * + * Size of the Check field: 8 bytes + */ + + LZMA_CHECK_SHA256 = 10 + /**< + * SHA-256 + * + * Size of the Check field: 32 bytes + */ +} lzma_check; + + +/** + * \brief Maximum valid Check ID + * + * The .xz file format specification specifies 16 Check IDs (0-15). Some + * of them are only reserved, that is, no actual Check algorithm has been + * assigned. When decoding, liblzma still accepts unknown Check IDs for + * future compatibility. If a valid but unsupported Check ID is detected, + * liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK, + * LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK in container.h. + */ +#define LZMA_CHECK_ID_MAX 15 + + +/** + * \brief Test if the given Check ID is supported + * + * Return true if the given Check ID is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * is not in the range [0, 15]; in that case the return value is always false. + * + * You can assume that LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always + * supported (even if liblzma is built with limited features). + */ +extern LZMA_API(lzma_bool) lzma_check_is_supported(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Get the size of the Check field with the given Check ID + * + * Although not all Check IDs have a check algorithm associated, the size of + * every Check is already frozen. This function returns the size (in bytes) of + * the Check field with the specified Check ID. The values are: + * { 0, 4, 4, 4, 8, 8, 8, 16, 16, 16, 32, 32, 32, 64, 64, 64 } + * + * If the argument is not in the range [0, 15], UINT32_MAX is returned. + */ +extern LZMA_API(uint32_t) lzma_check_size(lzma_check check) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Maximum size of a Check field + */ +#define LZMA_CHECK_SIZE_MAX 64 + + +/** + * \brief Calculate CRC32 + * + * Calculate CRC32 using the polynomial from the IEEE 802.3 standard. + * + * \param buf Pointer to the input buffer + * \param size Size of the input buffer + * \param crc Previously returned CRC value. This is used to + * calculate the CRC of a big buffer in smaller chunks. + * Set to zero when starting a new calculation. + * + * \return Updated CRC value, which can be passed to this function + * again to continue CRC calculation. + */ +extern LZMA_API(uint32_t) lzma_crc32( + const uint8_t *buf, size_t size, uint32_t crc) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate CRC64 + * + * Calculate CRC64 using the polynomial from the ECMA-182 standard. + * + * This function is used similarly to lzma_crc32(). See its documentation. + */ +extern LZMA_API(uint64_t) lzma_crc64( + const uint8_t *buf, size_t size, uint64_t crc) + lzma_nothrow lzma_attr_pure; + + +/* + * SHA-256 functions are currently not exported to public API. + * Contact Lasse Collin if you think it should be. + */ + + +/** + * \brief Get the type of the integrity check + * + * This function can be called only immediately after lzma_code() has + * returned LZMA_NO_CHECK, LZMA_UNSUPPORTED_CHECK, or LZMA_GET_CHECK. + * Calling this function in any other situation has undefined behavior. + */ +extern LZMA_API(lzma_check) lzma_get_check(const lzma_stream *strm) + lzma_nothrow; diff --git a/usr/include/lzma/container.h b/usr/include/lzma/container.h new file mode 100644 index 0000000..86991ad --- /dev/null +++ b/usr/include/lzma/container.h @@ -0,0 +1,619 @@ +/** + * \file lzma/container.h + * \brief File formats + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/************ + * Encoding * + ************/ + +/** + * \brief Default compression preset + * + * It's not straightforward to recommend a default preset, because in some + * cases keeping the resource usage relatively low is more important that + * getting the maximum compression ratio. + */ +#define LZMA_PRESET_DEFAULT UINT32_C(6) + + +/** + * \brief Mask for preset level + * + * This is useful only if you need to extract the level from the preset + * variable. That should be rare. + */ +#define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F) + + +/* + * Preset flags + * + * Currently only one flag is defined. + */ + +/** + * \brief Extreme compression preset + * + * This flag modifies the preset to make the encoding significantly slower + * while improving the compression ratio only marginally. This is useful + * when you don't mind wasting time to get as small result as possible. + * + * This flag doesn't affect the memory usage requirements of the decoder (at + * least not significantly). The memory usage of the encoder may be increased + * a little but only at the lowest preset levels (0-3). + */ +#define LZMA_PRESET_EXTREME (UINT32_C(1) << 31) + + +/** + * \brief Multithreading options + */ +typedef struct { + /** + * \brief Flags + * + * Set this to zero if no flags are wanted. + * + * No flags are currently supported. + */ + uint32_t flags; + + /** + * \brief Number of worker threads to use + */ + uint32_t threads; + + /** + * \brief Maximum uncompressed size of a Block + * + * The encoder will start a new .xz Block every block_size bytes. + * Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code() + * the caller may tell liblzma to start a new Block earlier. + * + * With LZMA2, a recommended block size is 2-4 times the LZMA2 + * dictionary size. With very small dictionaries, it is recommended + * to use at least 1 MiB block size for good compression ratio, even + * if this is more than four times the dictionary size. Note that + * these are only recommendations for typical use cases; feel free + * to use other values. Just keep in mind that using a block size + * less than the LZMA2 dictionary size is waste of RAM. + * + * Set this to 0 to let liblzma choose the block size depending + * on the compression options. For LZMA2 it will be 3*dict_size + * or 1 MiB, whichever is more. + * + * For each thread, about 3 * block_size bytes of memory will be + * allocated. This may change in later liblzma versions. If so, + * the memory usage will probably be reduced, not increased. + */ + uint64_t block_size; + + /** + * \brief Timeout to allow lzma_code() to return early + * + * Multithreading can make liblzma to consume input and produce + * output in a very bursty way: it may first read a lot of input + * to fill internal buffers, then no input or output occurs for + * a while. + * + * In single-threaded mode, lzma_code() won't return until it has + * either consumed all the input or filled the output buffer. If + * this is done in multithreaded mode, it may cause a call + * lzma_code() to take even tens of seconds, which isn't acceptable + * in all applications. + * + * To avoid very long blocking times in lzma_code(), a timeout + * (in milliseconds) may be set here. If lzma_code() would block + * longer than this number of milliseconds, it will return with + * LZMA_OK. Reasonable values are 100 ms or more. The xz command + * line tool uses 300 ms. + * + * If long blocking times are fine for you, set timeout to a special + * value of 0, which will disable the timeout mechanism and will make + * lzma_code() block until all the input is consumed or the output + * buffer has been filled. + * + * \note Even with a timeout, lzma_code() might sometimes take + * somewhat long time to return. No timing guarantees + * are made. + */ + uint32_t timeout; + + /** + * \brief Compression preset (level and possible flags) + * + * The preset is set just like with lzma_easy_encoder(). + * The preset is ignored if filters below is non-NULL. + */ + uint32_t preset; + + /** + * \brief Filter chain (alternative to a preset) + * + * If this is NULL, the preset above is used. Otherwise the preset + * is ignored and the filter chain specified here is used. + */ + const lzma_filter *filters; + + /** + * \brief Integrity check type + * + * See check.h for available checks. The xz command line tool + * defaults to LZMA_CHECK_CRC64, which is a good choice if you + * are unsure. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + uint64_t reserved_int5; + uint64_t reserved_int6; + uint64_t reserved_int7; + uint64_t reserved_int8; + void *reserved_ptr1; + void *reserved_ptr2; + void *reserved_ptr3; + void *reserved_ptr4; + +} lzma_mt; + + +/** + * \brief Calculate approximate memory usage of easy encoder + * + * This function is a wrapper for lzma_raw_encoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + * + * \return Number of bytes of memory required for the given + * preset when encoding. If an error occurs, for example + * due to unsupported preset, UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate approximate decoder memory usage of a preset + * + * This function is a wrapper for lzma_raw_decoder_memusage(). + * + * \param preset Compression preset (level and possible flags) + * + * \return Number of bytes of memory required to decompress a file + * that was compressed using the given preset. If an error + * occurs, for example due to unsupported preset, UINT64_MAX + * is returned. + */ +extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize .xz Stream encoder using a preset number + * + * This function is intended for those who just want to use the basic features + * if liblzma (that is, most developers out there). + * + * \param strm Pointer to lzma_stream that is at least initialized + * with LZMA_STREAM_INIT. + * \param preset Compression preset to use. A preset consist of level + * number and zero or more flags. Usually flags aren't + * used, so preset is simply a number [0, 9] which match + * the options -0 ... -9 of the xz command line tool. + * Additional flags can be be set using bitwise-or with + * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME. + * \param check Integrity check type to use. See check.h for available + * checks. The xz command line tool defaults to + * LZMA_CHECK_CRC64, which is a good choice if you are + * unsure. LZMA_CHECK_CRC32 is good too as long as the + * uncompressed file is not many gigabytes. + * + * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to + * encode your data. + * - LZMA_MEM_ERROR: Memory allocation failed. + * - LZMA_OPTIONS_ERROR: The given compression preset is not + * supported by this build of liblzma. + * - LZMA_UNSUPPORTED_CHECK: The given check type is not + * supported by this liblzma build. + * - LZMA_PROG_ERROR: One or more of the parameters have values + * that will never be valid. For example, strm == NULL. + * + * If initialization fails (return value is not LZMA_OK), all the memory + * allocated for *strm by liblzma is always freed. Thus, there is no need + * to call lzma_end() after failed initialization. + * + * If initialization succeeds, use lzma_code() to do the actual encoding. + * Valid values for `action' (the second argument of lzma_code()) are + * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future, + * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH. + */ +extern LZMA_API(lzma_ret) lzma_easy_encoder( + lzma_stream *strm, uint32_t preset, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream encoding using a preset number + * + * The maximum required output buffer size can be calculated with + * lzma_stream_buffer_bound(). + * + * \param preset Compression preset to use. See the description + * in lzma_easy_encoder(). + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_easy_buffer_encode( + uint32_t preset, lzma_check check, + const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Initialize .xz Stream encoder using a custom filter chain + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h for + * more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm, + const lzma_filter *filters, lzma_check check) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate approximate memory usage of multithreaded .xz encoder + * + * Since doing the encoding in threaded mode doesn't affect the memory + * requirements of single-threaded decompressor, you can use + * lzma_easy_decoder_memusage(options->preset) or + * lzma_raw_decoder_memusage(options->filters) to calculate + * the decompressor memory requirements. + * + * \param options Compression options + * + * \return Number of bytes of memory required for encoding with the + * given options. If an error occurs, for example due to + * unsupported preset or filter chain, UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage( + const lzma_mt *options) lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize multithreaded .xz Stream encoder + * + * This provides the functionality of lzma_easy_encoder() and + * lzma_stream_encoder() as a single function for multithreaded use. + * + * The supported actions for lzma_code() are LZMA_RUN, LZMA_FULL_FLUSH, + * LZMA_FULL_BARRIER, and LZMA_FINISH. Support for LZMA_SYNC_FLUSH might be + * added in the future. + * + * \param strm Pointer to properly prepared lzma_stream + * \param options Pointer to multithreaded compression options + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_encoder_mt( + lzma_stream *strm, const lzma_mt *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma encoder (legacy file format) + * + * The .lzma format is sometimes called the LZMA_Alone format, which is the + * reason for the name of this function. The .lzma format supports only the + * LZMA1 filter. There is no support for integrity checks like CRC32. + * + * Use this function if and only if you need to create files readable by + * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format + * is strongly recommended. + * + * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * No kind of flushing is supported, because the file format doesn't make + * it possible. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_encoder( + lzma_stream *strm, const lzma_options_lzma *options) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Calculate output buffer size for single-call Stream encoder + * + * When trying to compress uncompressible data, the encoded size will be + * slightly bigger than the input data. This function calculates how much + * output buffer space is required to be sure that lzma_stream_buffer_encode() + * doesn't return LZMA_BUF_ERROR. + * + * The calculated value is not exact, but it is guaranteed to be big enough. + * The actual maximum output space required may be slightly smaller (up to + * about 100 bytes). This should not be a problem in practice. + * + * If the calculated maximum size doesn't fit into size_t or would make the + * Stream grow past LZMA_VLI_MAX (which should never happen in practice), + * zero is returned to indicate the error. + * + * \note The limit calculated by this function applies only to + * single-call encoding. Multi-call encoding may (and probably + * will) have larger maximum expansion when encoding + * uncompressible data. Currently there is no function to + * calculate the maximum expansion of multi-call encoding. + */ +extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size) + lzma_nothrow; + + +/** + * \brief Single-call .xz Stream encoder + * + * \param filters Array of filters. This must be terminated with + * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h + * for more information. + * \param check Type of the integrity check to calculate from + * uncompressed data. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_UNSUPPORTED_CHECK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_encode( + lzma_filter *filters, lzma_check check, + const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/************ + * Decoding * + ************/ + +/** + * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream + * being decoded has no integrity check. Note that when used with + * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK + * if LZMA_TELL_NO_CHECK is used. + */ +#define LZMA_TELL_NO_CHECK UINT32_C(0x01) + + +/** + * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input + * stream has an integrity check, but the type of the integrity check is not + * supported by this liblzma version or build. Such files can still be + * decoded, but the integrity check cannot be verified. + */ +#define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02) + + +/** + * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type + * of the integrity check is known. The type can then be got with + * lzma_get_check(). + */ +#define LZMA_TELL_ANY_CHECK UINT32_C(0x04) + + +/** + * This flag makes lzma_code() not calculate and verify the integrity check + * of the compressed data in .xz files. This means that invalid integrity + * check values won't be detected and LZMA_DATA_ERROR won't be returned in + * such cases. + * + * This flag only affects the checks of the compressed data itself; the CRC32 + * values in the .xz headers will still be verified normally. + * + * Don't use this flag unless you know what you are doing. Possible reasons + * to use this flag: + * + * - Trying to recover data from a corrupt .xz file. + * + * - Speeding up decompression, which matters mostly with SHA-256 + * or with files that have compressed extremely well. It's recommended + * to not use this flag for this purpose unless the file integrity is + * verified externally in some other way. + * + * Support for this flag was added in liblzma 5.1.4beta. + */ +#define LZMA_IGNORE_CHECK UINT32_C(0x10) + + +/** + * This flag enables decoding of concatenated files with file formats that + * allow concatenating compressed files as is. From the formats currently + * supported by liblzma, only the .xz format allows concatenated files. + * Concatenated files are not allowed with the legacy .lzma format. + * + * This flag also affects the usage of the `action' argument for lzma_code(). + * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END + * unless LZMA_FINISH is used as `action'. Thus, the application has to set + * LZMA_FINISH in the same way as it does when encoding. + * + * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH + * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required. + */ +#define LZMA_CONCATENATED UINT32_C(0x08) + + +/** + * \brief Initialize .xz Stream decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode .xz Streams and .lzma files with autodetection + * + * This decoder autodetects between the .xz and .lzma file formats, and + * calls lzma_stream_decoder() or lzma_alone_decoder() once the type + * of the input file has been detected. + * + * \param strm Pointer to properly prepared lzma_stream + * \param memlimit Memory usage limit as bytes. Use UINT64_MAX + * to effectively disable the limiter. + * \param flags Bitwise-or of flags, or zero for no flags. + * + * \return - LZMA_OK: Initialization was successful. + * - LZMA_MEM_ERROR: Cannot allocate memory. + * - LZMA_OPTIONS_ERROR: Unsupported flags + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_auto_decoder( + lzma_stream *strm, uint64_t memlimit, uint32_t flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .lzma decoder (legacy file format) + * + * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH. + * There is no need to use LZMA_FINISH, but allowing it may simplify + * certain types of applications. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_alone_decoder( + lzma_stream *strm, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Stream decoder + * + * \param memlimit Pointer to how much memory the decoder is allowed + * to allocate. The value pointed by this pointer is + * modified if and only if LZMA_MEMLIMIT_ERROR is + * returned. + * \param flags Bitwise-or of zero or more of the decoder flags: + * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK, + * LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK + * is not allowed and will return LZMA_PROG_ERROR. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if decoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_DATA_ERROR + * - LZMA_NO_CHECK: This can be returned only if using + * the LZMA_TELL_NO_CHECK flag. + * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using + * the LZMA_TELL_UNSUPPORTED_CHECK flag. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_BUF_ERROR: Output buffer was too small. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_stream_buffer_decode( + uint64_t *memlimit, uint32_t flags, + const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/usr/include/lzma/delta.h b/usr/include/lzma/delta.h new file mode 100644 index 0000000..592fc4f --- /dev/null +++ b/usr/include/lzma/delta.h @@ -0,0 +1,77 @@ +/** + * \file lzma/delta.h + * \brief Delta filter + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Filter ID + * + * Filter ID of the Delta filter. This is used as lzma_filter.id. + */ +#define LZMA_FILTER_DELTA LZMA_VLI_C(0x03) + + +/** + * \brief Type of the delta calculation + * + * Currently only byte-wise delta is supported. Other possible types could + * be, for example, delta of 16/32/64-bit little/big endian integers, but + * these are not currently planned since byte-wise delta is almost as good. + */ +typedef enum { + LZMA_DELTA_TYPE_BYTE +} lzma_delta_type; + + +/** + * \brief Options for the Delta filter + * + * These options are needed by both encoder and decoder. + */ +typedef struct { + /** For now, this must always be LZMA_DELTA_TYPE_BYTE. */ + lzma_delta_type type; + + /** + * \brief Delta distance + * + * With the only currently supported type, LZMA_DELTA_TYPE_BYTE, + * the distance is as bytes. + * + * Examples: + * - 16-bit stereo audio: distance = 4 bytes + * - 24-bit RGB image data: distance = 3 bytes + */ + uint32_t dist; +# define LZMA_DELTA_DIST_MIN 1 +# define LZMA_DELTA_DIST_MAX 256 + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * when type is LZMA_DELTA_TYPE_BYTE, so it is safe to leave these + * uninitialized. + */ + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + void *reserved_ptr1; + void *reserved_ptr2; + +} lzma_options_delta; diff --git a/usr/include/lzma/filter.h b/usr/include/lzma/filter.h new file mode 100644 index 0000000..4e78752 --- /dev/null +++ b/usr/include/lzma/filter.h @@ -0,0 +1,425 @@ +/** + * \file lzma/filter.h + * \brief Common filter related types and functions + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum number of filters in a chain + * + * A filter chain can have 1-4 filters, of which three are allowed to change + * the size of the data. Usually only one or two filters are needed. + */ +#define LZMA_FILTERS_MAX 4 + + +/** + * \brief Filter options + * + * This structure is used to pass Filter ID and a pointer filter's + * options to liblzma. A few functions work with a single lzma_filter + * structure, while most functions expect a filter chain. + * + * A filter chain is indicated with an array of lzma_filter structures. + * The array is terminated with .id = LZMA_VLI_UNKNOWN. Thus, the filter + * array must have LZMA_FILTERS_MAX + 1 elements (that is, five) to + * be able to hold any arbitrary filter chain. This is important when + * using lzma_block_header_decode() from block.h, because too small + * array would make liblzma write past the end of the filters array. + */ +typedef struct { + /** + * \brief Filter ID + * + * Use constants whose name begin with `LZMA_FILTER_' to specify + * different filters. In an array of lzma_filter structures, use + * LZMA_VLI_UNKNOWN to indicate end of filters. + * + * \note This is not an enum, because on some systems enums + * cannot be 64-bit. + */ + lzma_vli id; + + /** + * \brief Pointer to filter-specific options structure + * + * If the filter doesn't need options, set this to NULL. If id is + * set to LZMA_VLI_UNKNOWN, options is ignored, and thus + * doesn't need be initialized. + */ + void *options; + +} lzma_filter; + + +/** + * \brief Test if the given Filter ID is supported for encoding + * + * Return true if the give Filter ID is supported for encoding by this + * liblzma build. Otherwise false is returned. + * + * There is no way to list which filters are available in this particular + * liblzma version and build. It would be useless, because the application + * couldn't know what kind of options the filter would need. + */ +extern LZMA_API(lzma_bool) lzma_filter_encoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Test if the given Filter ID is supported for decoding + * + * Return true if the give Filter ID is supported for decoding by this + * liblzma build. Otherwise false is returned. + */ +extern LZMA_API(lzma_bool) lzma_filter_decoder_is_supported(lzma_vli id) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Copy the filters array + * + * Copy the Filter IDs and filter-specific options from src to dest. + * Up to LZMA_FILTERS_MAX filters are copied, plus the terminating + * .id == LZMA_VLI_UNKNOWN. Thus, dest should have at least + * LZMA_FILTERS_MAX + 1 elements space unless the caller knows that + * src is smaller than that. + * + * Unless the filter-specific options is NULL, the Filter ID has to be + * supported by liblzma, because liblzma needs to know the size of every + * filter-specific options structure. The filter-specific options are not + * validated. If options is NULL, any unsupported Filter IDs are copied + * without returning an error. + * + * Old filter-specific options in dest are not freed, so dest doesn't + * need to be initialized by the caller in any way. + * + * If an error occurs, memory possibly already allocated by this function + * is always freed. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR: Unsupported Filter ID and its options + * is not NULL. + * - LZMA_PROG_ERROR: src or dest is NULL. + */ +extern LZMA_API(lzma_ret) lzma_filters_copy( + const lzma_filter *src, lzma_filter *dest, + const lzma_allocator *allocator) lzma_nothrow; + + +/** + * \brief Calculate approximate memory requirements for raw encoder + * + * This function can be used to calculate the memory requirements for + * Block and Stream encoders too because Block and Stream encoders don't + * need significantly more memory than raw encoder. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Number of bytes of memory required for the given + * filter chain when encoding. If an error occurs, + * for example due to unsupported filter chain, + * UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_raw_encoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Calculate approximate memory requirements for raw decoder + * + * This function can be used to calculate the memory requirements for + * Block and Stream decoders too because Block and Stream decoders don't + * need significantly more memory than raw decoder. + * + * \param filters Array of filters terminated with + * .id == LZMA_VLI_UNKNOWN. + * + * \return Number of bytes of memory required for the given + * filter chain when decoding. If an error occurs, + * for example due to unsupported filter chain, + * UINT64_MAX is returned. + */ +extern LZMA_API(uint64_t) lzma_raw_decoder_memusage(const lzma_filter *filters) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize raw encoder + * + * This function may be useful when implementing custom file formats. + * + * \param strm Pointer to properly prepared lzma_stream + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * + * The `action' with lzma_code() can be LZMA_RUN, LZMA_SYNC_FLUSH (if the + * filter chain supports it), or LZMA_FINISH. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_encoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize raw decoder + * + * The initialization of raw decoder goes similarly to raw encoder. + * + * The `action' with lzma_code() can be LZMA_RUN or LZMA_FINISH. Using + * LZMA_FINISH is not required, it is supported just for convenience. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_raw_decoder( + lzma_stream *strm, const lzma_filter *filters) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Update the filter chain in the encoder + * + * This function is for advanced users only. This function has two slightly + * different purposes: + * + * - After LZMA_FULL_FLUSH when using Stream encoder: Set a new filter + * chain, which will be used starting from the next Block. + * + * - After LZMA_SYNC_FLUSH using Raw, Block, or Stream encoder: Change + * the filter-specific options in the middle of encoding. The actual + * filters in the chain (Filter IDs) cannot be changed. In the future, + * it might become possible to change the filter options without + * using LZMA_SYNC_FLUSH. + * + * While rarely useful, this function may be called also when no data has + * been compressed yet. In that case, this function will behave as if + * LZMA_FULL_FLUSH (Stream encoder) or LZMA_SYNC_FLUSH (Raw or Block + * encoder) had been used right before calling this function. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filters_update( + lzma_stream *strm, const lzma_filter *filters) lzma_nothrow; + + +/** + * \brief Single-call raw encoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_size Size of the input buffer + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Not enough output buffer space. + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + * + * \note There is no function to calculate how big output buffer + * would surely be big enough. (lzma_stream_buffer_bound() + * works only for lzma_stream_buffer_encode(); raw encoder + * won't necessarily meet that bound.) + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_encode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t in_size, uint8_t *out, + size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call raw decoder + * + * \param filters Array of lzma_filter structures. The end of the + * array must be marked with .id = LZMA_VLI_UNKNOWN. + * \param allocator lzma_allocator for custom allocator functions. + * Set to NULL to use malloc() and free(). + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + */ +extern LZMA_API(lzma_ret) lzma_raw_buffer_decode( + const lzma_filter *filters, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Get the size of the Filter Properties field + * + * This function may be useful when implementing custom file formats + * using the raw encoder and decoder. + * + * \param size Pointer to uint32_t to hold the size of the properties + * \param filter Filter ID and options (the size of the properties may + * vary depending on the options) + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note This function validates the Filter ID, but does not + * necessarily validate the options. Thus, it is possible + * that this returns LZMA_OK while the following call to + * lzma_properties_encode() returns LZMA_OPTIONS_ERROR. + */ +extern LZMA_API(lzma_ret) lzma_properties_size( + uint32_t *size, const lzma_filter *filter) lzma_nothrow; + + +/** + * \brief Encode the Filter Properties field + * + * \param filter Filter ID and options + * \param props Buffer to hold the encoded options. The size of + * buffer must have been already determined with + * lzma_properties_size(). + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_PROG_ERROR + * + * \note Even this function won't validate more options than actually + * necessary. Thus, it is possible that encoding the properties + * succeeds but using the same options to initialize the encoder + * will fail. + * + * \note If lzma_properties_size() indicated that the size + * of the Filter Properties field is zero, calling + * lzma_properties_encode() is not required, but it + * won't do any harm either. + */ +extern LZMA_API(lzma_ret) lzma_properties_encode( + const lzma_filter *filter, uint8_t *props) lzma_nothrow; + + +/** + * \brief Decode the Filter Properties field + * + * \param filter filter->id must have been set to the correct + * Filter ID. filter->options doesn't need to be + * initialized (it's not freed by this function). The + * decoded options will be stored to filter->options. + * filter->options is set to NULL if there are no + * properties or if an error occurs. + * \param allocator Custom memory allocator used to allocate the + * options. Set to NULL to use the default malloc(), + * and in case of an error, also free(). + * \param props Input buffer containing the properties. + * \param props_size Size of the properties. This must be the exact + * size; giving too much or too little input will + * return LZMA_OPTIONS_ERROR. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + */ +extern LZMA_API(lzma_ret) lzma_properties_decode( + lzma_filter *filter, const lzma_allocator *allocator, + const uint8_t *props, size_t props_size) lzma_nothrow; + + +/** + * \brief Calculate encoded size of a Filter Flags field + * + * Knowing the size of Filter Flags is useful to know when allocating + * memory to hold the encoded Filter Flags. + * + * \param size Pointer to integer to hold the calculated size + * \param filter Filter ID and associated options whose encoded + * size is to be calculated + * + * \return - LZMA_OK: *size set successfully. Note that this doesn't + * guarantee that filter->options is valid, thus + * lzma_filter_flags_encode() may still fail. + * - LZMA_OPTIONS_ERROR: Unknown Filter ID or unsupported options. + * - LZMA_PROG_ERROR: Invalid options + * + * \note If you need to calculate size of List of Filter Flags, + * you need to loop over every lzma_filter entry. + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_size( + uint32_t *size, const lzma_filter *filter) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Filter Flags into given buffer + * + * In contrast to some functions, this doesn't allocate the needed buffer. + * This is due to how this function is used internally by liblzma. + * + * \param filter Filter ID and options to be encoded + * \param out Beginning of the output buffer + * \param out_pos out[*out_pos] is the next write position. This + * is updated by the encoder. + * \param out_size out[out_size] is the first byte to not write. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: Invalid or unsupported options. + * - LZMA_PROG_ERROR: Invalid options or not enough output + * buffer space (you should have checked it with + * lzma_filter_flags_size()). + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_encode(const lzma_filter *filter, + uint8_t *out, size_t *out_pos, size_t out_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Filter Flags from given buffer + * + * The decoded result is stored into *filter. The old value of + * filter->options is not free()d. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_filter_flags_decode( + lzma_filter *filter, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; diff --git a/usr/include/lzma/hardware.h b/usr/include/lzma/hardware.h new file mode 100644 index 0000000..5321d9a --- /dev/null +++ b/usr/include/lzma/hardware.h @@ -0,0 +1,64 @@ +/** + * \file lzma/hardware.h + * \brief Hardware information + * + * Since liblzma can consume a lot of system resources, it also provides + * ways to limit the resource usage. Applications linking against liblzma + * need to do the actual decisions how much resources to let liblzma to use. + * To ease making these decisions, liblzma provides functions to find out + * the relevant capabilities of the underlaying hardware. Currently there + * is only a function to find out the amount of RAM, but in the future there + * will be also a function to detect how many concurrent threads the system + * can run. + * + * \note On some operating systems, these function may temporarily + * load a shared library or open file descriptor(s) to find out + * the requested hardware information. Unless the application + * assumes that specific file descriptors are not touched by + * other threads, this should have no effect on thread safety. + * Possible operations involving file descriptors will restart + * the syscalls if they return EINTR. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Get the total amount of physical memory (RAM) in bytes + * + * This function may be useful when determining a reasonable memory + * usage limit for decompressing or how much memory it is OK to use + * for compressing. + * + * \return On success, the total amount of physical memory in bytes + * is returned. If the amount of RAM cannot be determined, + * zero is returned. This can happen if an error occurs + * or if there is no code in liblzma to detect the amount + * of RAM on the specific operating system. + */ +extern LZMA_API(uint64_t) lzma_physmem(void) lzma_nothrow; + + +/** + * \brief Get the number of processor cores or threads + * + * This function may be useful when determining how many threads to use. + * If the hardware supports more than one thread per CPU core, the number + * of hardware threads is returned if that information is available. + * + * \brief On success, the number of available CPU threads or cores is + * returned. If this information isn't available or an error + * occurs, zero is returned. + */ +extern LZMA_API(uint32_t) lzma_cputhreads(void) lzma_nothrow; diff --git a/usr/include/lzma/index.h b/usr/include/lzma/index.h new file mode 100644 index 0000000..dda60ec --- /dev/null +++ b/usr/include/lzma/index.h @@ -0,0 +1,682 @@ +/** + * \file lzma/index.h + * \brief Handling of .xz Index and related information + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Opaque data type to hold the Index(es) and other information + * + * lzma_index often holds just one .xz Index and possibly the Stream Flags + * of the same Stream and size of the Stream Padding field. However, + * multiple lzma_indexes can be concatenated with lzma_index_cat() and then + * there may be information about multiple Streams in the same lzma_index. + * + * Notes about thread safety: Only one thread may modify lzma_index at + * a time. All functions that take non-const pointer to lzma_index + * modify it. As long as no thread is modifying the lzma_index, getting + * information from the same lzma_index can be done from multiple threads + * at the same time with functions that take a const pointer to + * lzma_index or use lzma_index_iter. The same iterator must be used + * only by one thread at a time, of course, but there can be as many + * iterators for the same lzma_index as needed. + */ +typedef struct lzma_index_s lzma_index; + + +/** + * \brief Iterator to get information about Blocks and Streams + */ +typedef struct { + struct { + /** + * \brief Pointer to Stream Flags + * + * This is NULL if Stream Flags have not been set for + * this Stream with lzma_index_stream_flags(). + */ + const lzma_stream_flags *flags; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + + /** + * \brief Stream number in the lzma_index + * + * The first Stream is 1. + */ + lzma_vli number; + + /** + * \brief Number of Blocks in the Stream + * + * If this is zero, the block structure below has + * undefined values. + */ + lzma_vli block_count; + + /** + * \brief Compressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli compressed_offset; + + /** + * \brief Uncompressed start offset of this Stream + * + * The offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + */ + lzma_vli uncompressed_offset; + + /** + * \brief Compressed size of this Stream + * + * This includes all headers except the possible + * Stream Padding after this Stream. + */ + lzma_vli compressed_size; + + /** + * \brief Uncompressed size of this Stream + */ + lzma_vli uncompressed_size; + + /** + * \brief Size of Stream Padding after this Stream + * + * If it hasn't been set with lzma_index_stream_padding(), + * this defaults to zero. Stream Padding is always + * a multiple of four bytes. + */ + lzma_vli padding; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + } stream; + + struct { + /** + * \brief Block number in the file + * + * The first Block is 1. + */ + lzma_vli number_in_file; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the + * lzma_index (i.e. usually the beginning of the .xz file). + * Normally this is where you should seek in the .xz file + * to start decompressing this Block. + */ + lzma_vli compressed_file_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the lzma_index + * (i.e. usually the beginning of the .xz file). + * + * When doing random-access reading, it is possible that + * the target offset is not exactly at Block boundary. One + * will need to compare the target offset against + * uncompressed_file_offset or uncompressed_stream_offset, + * and possibly decode and throw away some amount of data + * before reaching the target offset. + */ + lzma_vli uncompressed_file_offset; + + /** + * \brief Block number in this Stream + * + * The first Block is 1. + */ + lzma_vli number_in_stream; + + /** + * \brief Compressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli compressed_stream_offset; + + /** + * \brief Uncompressed start offset of this Block + * + * This offset is relative to the beginning of the Stream + * containing this Block. + */ + lzma_vli uncompressed_stream_offset; + + /** + * \brief Uncompressed size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. It will allow the Block decoder to + * validate the uncompressed size. + */ + lzma_vli uncompressed_size; + + /** + * \brief Unpadded size of this Block + * + * You should pass this to the Block decoder if you will + * decode this Block. It will allow the Block decoder to + * validate the unpadded size. + */ + lzma_vli unpadded_size; + + /** + * \brief Total compressed size + * + * This includes all headers and padding in this Block. + * This is useful if you need to know how many bytes + * the Block decoder will actually read. + */ + lzma_vli total_size; + + lzma_vli reserved_vli1; + lzma_vli reserved_vli2; + lzma_vli reserved_vli3; + lzma_vli reserved_vli4; + + const void *reserved_ptr1; + const void *reserved_ptr2; + const void *reserved_ptr3; + const void *reserved_ptr4; + } block; + + /* + * Internal data which is used to store the state of the iterator. + * The exact format may vary between liblzma versions, so don't + * touch these in any way. + */ + union { + const void *p; + size_t s; + lzma_vli v; + } internal[6]; +} lzma_index_iter; + + +/** + * \brief Operation mode for lzma_index_iter_next() + */ +typedef enum { + LZMA_INDEX_ITER_ANY = 0, + /**< + * \brief Get the next Block or Stream + * + * Go to the next Block if the current Stream has at least + * one Block left. Otherwise go to the next Stream even if + * it has no Blocks. If the Stream has no Blocks + * (lzma_index_iter.stream.block_count == 0), + * lzma_index_iter.block will have undefined values. + */ + + LZMA_INDEX_ITER_STREAM = 1, + /**< + * \brief Get the next Stream + * + * Go to the next Stream even if the current Stream has + * unread Blocks left. If the next Stream has at least one + * Block, the iterator will point to the first Block. + * If there are no Blocks, lzma_index_iter.block will have + * undefined values. + */ + + LZMA_INDEX_ITER_BLOCK = 2, + /**< + * \brief Get the next Block + * + * Go to the next Block if the current Stream has at least + * one Block left. If the current Stream has no Blocks left, + * the next Stream with at least one Block is located and + * the iterator will be made to point to the first Block of + * that Stream. + */ + + LZMA_INDEX_ITER_NONEMPTY_BLOCK = 3 + /**< + * \brief Get the next non-empty Block + * + * This is like LZMA_INDEX_ITER_BLOCK except that it will + * skip Blocks whose Uncompressed Size is zero. + */ + +} lzma_index_iter_mode; + + +/** + * \brief Calculate memory usage of lzma_index + * + * On disk, the size of the Index field depends on both the number of Records + * stored and how big values the Records store (due to variable-length integer + * encoding). When the Index is kept in lzma_index structure, the memory usage + * depends only on the number of Records/Blocks stored in the Index(es), and + * in case of concatenated lzma_indexes, the number of Streams. The size in + * RAM is almost always significantly bigger than in the encoded form on disk. + * + * This function calculates an approximate amount of memory needed hold + * the given number of Streams and Blocks in lzma_index structure. This + * value may vary between CPU architectures and also between liblzma versions + * if the internal implementation is modified. + */ +extern LZMA_API(uint64_t) lzma_index_memusage( + lzma_vli streams, lzma_vli blocks) lzma_nothrow; + + +/** + * \brief Calculate the memory usage of an existing lzma_index + * + * This is a shorthand for lzma_index_memusage(lzma_index_stream_count(i), + * lzma_index_block_count(i)). + */ +extern LZMA_API(uint64_t) lzma_index_memused(const lzma_index *i) + lzma_nothrow; + + +/** + * \brief Allocate and initialize a new lzma_index structure + * + * \return On success, a pointer to an empty initialized lzma_index is + * returned. If allocation fails, NULL is returned. + */ +extern LZMA_API(lzma_index *) lzma_index_init(const lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Deallocate lzma_index + * + * If i is NULL, this does nothing. + */ +extern LZMA_API(void) lzma_index_end( + lzma_index *i, const lzma_allocator *allocator) lzma_nothrow; + + +/** + * \brief Add a new Block to lzma_index + * + * \param i Pointer to a lzma_index structure + * \param allocator Pointer to lzma_allocator, or NULL to + * use malloc() + * \param unpadded_size Unpadded Size of a Block. This can be + * calculated with lzma_block_unpadded_size() + * after encoding or decoding the Block. + * \param uncompressed_size Uncompressed Size of a Block. This can be + * taken directly from lzma_block structure + * after encoding or decoding the Block. + * + * Appending a new Block does not invalidate iterators. For example, + * if an iterator was pointing to the end of the lzma_index, after + * lzma_index_append() it is possible to read the next Block with + * an existing iterator. + * + * \return - LZMA_OK + * - LZMA_MEM_ERROR + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_append( + lzma_index *i, const lzma_allocator *allocator, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Set the Stream Flags + * + * Set the Stream Flags of the last (and typically the only) Stream + * in lzma_index. This can be useful when reading information from the + * lzma_index, because to decode Blocks, knowing the integrity check type + * is needed. + * + * The given Stream Flags are copied into internal preallocated structure + * in the lzma_index, thus the caller doesn't need to keep the *stream_flags + * available after calling this function. + * + * \return - LZMA_OK + * - LZMA_OPTIONS_ERROR: Unsupported stream_flags->version. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_flags( + lzma_index *i, const lzma_stream_flags *stream_flags) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the types of integrity Checks + * + * If lzma_index_stream_flags() is used to set the Stream Flags for + * every Stream, lzma_index_checks() can be used to get a bitmask to + * indicate which Check types have been used. It can be useful e.g. if + * showing the Check types to the user. + * + * The bitmask is 1 << check_id, e.g. CRC32 is 1 << 1 and SHA-256 is 1 << 10. + */ +extern LZMA_API(uint32_t) lzma_index_checks(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Set the amount of Stream Padding + * + * Set the amount of Stream Padding of the last (and typically the only) + * Stream in the lzma_index. This is needed when planning to do random-access + * reading within multiple concatenated Streams. + * + * By default, the amount of Stream Padding is assumed to be zero bytes. + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: The file size would grow too big. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_stream_padding( + lzma_index *i, lzma_vli stream_padding) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the number of Streams + */ +extern LZMA_API(lzma_vli) lzma_index_stream_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the number of Blocks + * + * This returns the total number of Blocks in lzma_index. To get number + * of Blocks in individual Streams, use lzma_index_iter. + */ +extern LZMA_API(lzma_vli) lzma_index_block_count(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Stream + * + * If multiple lzma_indexes have been combined, this works as if the Blocks + * were in a single Stream. This is useful if you are going to combine + * Blocks from multiple Streams into a single new Stream. + */ +extern LZMA_API(lzma_vli) lzma_index_stream_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the Blocks + * + * This doesn't include the Stream Header, Stream Footer, Stream Padding, + * or Index fields. + */ +extern LZMA_API(lzma_vli) lzma_index_total_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the total size of the file + * + * When no lzma_indexes have been combined with lzma_index_cat() and there is + * no Stream Padding, this function is identical to lzma_index_stream_size(). + * If multiple lzma_indexes have been combined, this includes also the headers + * of each separate Stream and the possible Stream Padding fields. + */ +extern LZMA_API(lzma_vli) lzma_index_file_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Get the uncompressed size of the file + */ +extern LZMA_API(lzma_vli) lzma_index_uncompressed_size(const lzma_index *i) + lzma_nothrow lzma_attr_pure; + + +/** + * \brief Initialize an iterator + * + * \param iter Pointer to a lzma_index_iter structure + * \param i lzma_index to which the iterator will be associated + * + * This function associates the iterator with the given lzma_index, and calls + * lzma_index_iter_rewind() on the iterator. + * + * This function doesn't allocate any memory, thus there is no + * lzma_index_iter_end(). The iterator is valid as long as the + * associated lzma_index is valid, that is, until lzma_index_end() or + * using it as source in lzma_index_cat(). Specifically, lzma_index doesn't + * become invalid if new Blocks are added to it with lzma_index_append() or + * if it is used as the destination in lzma_index_cat(). + * + * It is safe to make copies of an initialized lzma_index_iter, for example, + * to easily restart reading at some particular position. + */ +extern LZMA_API(void) lzma_index_iter_init( + lzma_index_iter *iter, const lzma_index *i) lzma_nothrow; + + +/** + * \brief Rewind the iterator + * + * Rewind the iterator so that next call to lzma_index_iter_next() will + * return the first Block or Stream. + */ +extern LZMA_API(void) lzma_index_iter_rewind(lzma_index_iter *iter) + lzma_nothrow; + + +/** + * \brief Get the next Block or Stream + * + * \param iter Iterator initialized with lzma_index_iter_init() + * \param mode Specify what kind of information the caller wants + * to get. See lzma_index_iter_mode for details. + * + * \return If next Block or Stream matching the mode was found, *iter + * is updated and this function returns false. If no Block or + * Stream matching the mode is found, *iter is not modified + * and this function returns true. If mode is set to an unknown + * value, *iter is not modified and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_next( + lzma_index_iter *iter, lzma_index_iter_mode mode) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Locate a Block + * + * If it is possible to seek in the .xz file, it is possible to parse + * the Index field(s) and use lzma_index_iter_locate() to do random-access + * reading with granularity of Block size. + * + * \param iter Iterator that was earlier initialized with + * lzma_index_iter_init(). + * \param target Uncompressed target offset which the caller would + * like to locate from the Stream + * + * If the target is smaller than the uncompressed size of the Stream (can be + * checked with lzma_index_uncompressed_size()): + * - Information about the Stream and Block containing the requested + * uncompressed offset is stored into *iter. + * - Internal state of the iterator is adjusted so that + * lzma_index_iter_next() can be used to read subsequent Blocks or Streams. + * - This function returns false. + * + * If target is greater than the uncompressed size of the Stream, *iter + * is not modified, and this function returns true. + */ +extern LZMA_API(lzma_bool) lzma_index_iter_locate( + lzma_index_iter *iter, lzma_vli target) lzma_nothrow; + + +/** + * \brief Concatenate lzma_indexes + * + * Concatenating lzma_indexes is useful when doing random-access reading in + * multi-Stream .xz file, or when combining multiple Streams into single + * Stream. + * + * \param dest lzma_index after which src is appended + * \param src lzma_index to be appended after dest. If this + * function succeeds, the memory allocated for src + * is freed or moved to be part of dest, and all + * iterators pointing to src will become invalid. + * \param allocator Custom memory allocator; can be NULL to use + * malloc() and free(). + * + * \return - LZMA_OK: lzma_indexes were concatenated successfully. + * src is now a dangling pointer. + * - LZMA_DATA_ERROR: *dest would grow too big. + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_cat(lzma_index *dest, lzma_index *src, + const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Duplicate lzma_index + * + * \return A copy of the lzma_index, or NULL if memory allocation failed. + */ +extern LZMA_API(lzma_index *) lzma_index_dup( + const lzma_index *i, const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index encoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i Pointer to lzma_index which should be encoded. + * + * The valid `action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * It is enough to use only one of them (you can choose freely; use LZMA_RUN + * to support liblzma versions older than 5.0.0). + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_encoder( + lzma_stream *strm, const lzma_index *i) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Initialize .xz Index decoder + * + * \param strm Pointer to properly prepared lzma_stream + * \param i The decoded Index will be made available via + * this pointer. Initially this function will + * set *i to NULL (the old value is ignored). If + * decoding succeeds (lzma_code() returns + * LZMA_STREAM_END), *i will be set to point + * to a new lzma_index, which the application + * has to later free with lzma_index_end(). + * \param memlimit How much memory the resulting lzma_index is + * allowed to require. + * + * The valid `action' values for lzma_code() are LZMA_RUN and LZMA_FINISH. + * It is enough to use only one of them (you can choose freely; use LZMA_RUN + * to support liblzma versions older than 5.0.0). + * + * \return - LZMA_OK: Initialization succeeded, continue with lzma_code(). + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_decoder( + lzma_stream *strm, lzma_index **i, uint64_t memlimit) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Single-call .xz Index encoder + * + * \param i lzma_index to be encoded + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * *out_pos is updated only if encoding succeeds. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_BUF_ERROR: Output buffer is too small. Use + * lzma_index_size() to find out how much output + * space is needed. + * - LZMA_PROG_ERROR + * + * \note This function doesn't take allocator argument since all + * the internal data is allocated on stack. + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_encode(const lzma_index *i, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Single-call .xz Index decoder + * + * \param i If decoding succeeds, *i will point to a new + * lzma_index, which the application has to + * later free with lzma_index_end(). If an error + * occurs, *i will be NULL. The old value of *i + * is always ignored and thus doesn't need to be + * initialized by the caller. + * \param memlimit Pointer to how much memory the resulting + * lzma_index is allowed to require. The value + * pointed by this pointer is modified if and only + * if LZMA_MEMLIMIT_ERROR is returned. + * \param allocator Pointer to lzma_allocator, or NULL to use malloc() + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * *in_pos is updated only if decoding succeeds. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_MEM_ERROR + * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached. + * The minimum required memlimit value was stored to *memlimit. + * - LZMA_DATA_ERROR + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_buffer_decode(lzma_index **i, + uint64_t *memlimit, const lzma_allocator *allocator, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; diff --git a/usr/include/lzma/index_hash.h b/usr/include/lzma/index_hash.h new file mode 100644 index 0000000..9287f1d --- /dev/null +++ b/usr/include/lzma/index_hash.h @@ -0,0 +1,107 @@ +/** + * \file lzma/index_hash.h + * \brief Validate Index by using a hash function + * + * Hashing makes it possible to use constant amount of memory to validate + * Index of arbitrary size. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + +/** + * \brief Opaque data type to hold the Index hash + */ +typedef struct lzma_index_hash_s lzma_index_hash; + + +/** + * \brief Allocate and initialize a new lzma_index_hash structure + * + * If index_hash is NULL, a new lzma_index_hash structure is allocated, + * initialized, and a pointer to it returned. If allocation fails, NULL + * is returned. + * + * If index_hash is non-NULL, it is reinitialized and the same pointer + * returned. In this case, return value cannot be NULL or a different + * pointer than the index_hash that was given as an argument. + */ +extern LZMA_API(lzma_index_hash *) lzma_index_hash_init( + lzma_index_hash *index_hash, const lzma_allocator *allocator) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Deallocate lzma_index_hash structure + */ +extern LZMA_API(void) lzma_index_hash_end( + lzma_index_hash *index_hash, const lzma_allocator *allocator) + lzma_nothrow; + + +/** + * \brief Add a new Record to an Index hash + * + * \param index Pointer to a lzma_index_hash structure + * \param unpadded_size Unpadded Size of a Block + * \param uncompressed_size Uncompressed Size of a Block + * + * \return - LZMA_OK + * - LZMA_DATA_ERROR: Compressed or uncompressed size of the + * Stream or size of the Index field would grow too big. + * - LZMA_PROG_ERROR: Invalid arguments or this function is being + * used when lzma_index_hash_decode() has already been used. + */ +extern LZMA_API(lzma_ret) lzma_index_hash_append(lzma_index_hash *index_hash, + lzma_vli unpadded_size, lzma_vli uncompressed_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode and validate the Index field + * + * After telling the sizes of all Blocks with lzma_index_hash_append(), + * the actual Index field is decoded with this function. Specifically, + * once decoding of the Index field has been started, no more Records + * can be added using lzma_index_hash_append(). + * + * This function doesn't use lzma_stream structure to pass the input data. + * Instead, the input buffer is specified using three arguments. This is + * because it matches better the internal APIs of liblzma. + * + * \param index_hash Pointer to a lzma_index_hash structure + * \param in Pointer to the beginning of the input buffer + * \param in_pos in[*in_pos] is the next byte to process + * \param in_size in[in_size] is the first byte not to process + * + * \return - LZMA_OK: So far good, but more input is needed. + * - LZMA_STREAM_END: Index decoded successfully and it matches + * the Records given with lzma_index_hash_append(). + * - LZMA_DATA_ERROR: Index is corrupt or doesn't match the + * information given with lzma_index_hash_append(). + * - LZMA_BUF_ERROR: Cannot progress because *in_pos >= in_size. + * - LZMA_PROG_ERROR + */ +extern LZMA_API(lzma_ret) lzma_index_hash_decode(lzma_index_hash *index_hash, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Get the size of the Index field as bytes + * + * This is needed to verify the Backward Size field in the Stream Footer. + */ +extern LZMA_API(lzma_vli) lzma_index_hash_size( + const lzma_index_hash *index_hash) + lzma_nothrow lzma_attr_pure; diff --git a/usr/include/lzma/lzma12.h b/usr/include/lzma/lzma12.h new file mode 100644 index 0000000..4e32fa3 --- /dev/null +++ b/usr/include/lzma/lzma12.h @@ -0,0 +1,420 @@ +/** + * \file lzma/lzma12.h + * \brief LZMA1 and LZMA2 filters + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief LZMA1 Filter ID + * + * LZMA1 is the very same thing as what was called just LZMA in LZMA Utils, + * 7-Zip, and LZMA SDK. It's called LZMA1 here to prevent developers from + * accidentally using LZMA when they actually want LZMA2. + * + * LZMA1 shouldn't be used for new applications unless you _really_ know + * what you are doing. LZMA2 is almost always a better choice. + */ +#define LZMA_FILTER_LZMA1 LZMA_VLI_C(0x4000000000000001) + +/** + * \brief LZMA2 Filter ID + * + * Usually you want this instead of LZMA1. Compared to LZMA1, LZMA2 adds + * support for LZMA_SYNC_FLUSH, uncompressed chunks (smaller expansion + * when trying to compress uncompressible data), possibility to change + * lc/lp/pb in the middle of encoding, and some other internal improvements. + */ +#define LZMA_FILTER_LZMA2 LZMA_VLI_C(0x21) + + +/** + * \brief Match finders + * + * Match finder has major effect on both speed and compression ratio. + * Usually hash chains are faster than binary trees. + * + * If you will use LZMA_SYNC_FLUSH often, the hash chains may be a better + * choice, because binary trees get much higher compression ratio penalty + * with LZMA_SYNC_FLUSH. + * + * The memory usage formulas are only rough estimates, which are closest to + * reality when dict_size is a power of two. The formulas are more complex + * in reality, and can also change a little between liblzma versions. Use + * lzma_raw_encoder_memusage() to get more accurate estimate of memory usage. + */ +typedef enum { + LZMA_MF_HC3 = 0x03, + /**< + * \brief Hash Chain with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 7.5 + * - dict_size > 16 MiB: dict_size * 5.5 + 64 MiB + */ + + LZMA_MF_HC4 = 0x04, + /**< + * \brief Hash Chain with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: + * - dict_size <= 32 MiB: dict_size * 7.5 + * - dict_size > 32 MiB: dict_size * 6.5 + */ + + LZMA_MF_BT2 = 0x12, + /**< + * \brief Binary Tree with 2-byte hashing + * + * Minimum nice_len: 2 + * + * Memory usage: dict_size * 9.5 + */ + + LZMA_MF_BT3 = 0x13, + /**< + * \brief Binary Tree with 2- and 3-byte hashing + * + * Minimum nice_len: 3 + * + * Memory usage: + * - dict_size <= 16 MiB: dict_size * 11.5 + * - dict_size > 16 MiB: dict_size * 9.5 + 64 MiB + */ + + LZMA_MF_BT4 = 0x14 + /**< + * \brief Binary Tree with 2-, 3-, and 4-byte hashing + * + * Minimum nice_len: 4 + * + * Memory usage: + * - dict_size <= 32 MiB: dict_size * 11.5 + * - dict_size > 32 MiB: dict_size * 10.5 + */ +} lzma_match_finder; + + +/** + * \brief Test if given match finder is supported + * + * Return true if the given match finder is supported by this liblzma build. + * Otherwise false is returned. It is safe to call this with a value that + * isn't listed in lzma_match_finder enumeration; the return value will be + * false. + * + * There is no way to list which match finders are available in this + * particular liblzma version and build. It would be useless, because + * a new match finder, which the application developer wasn't aware, + * could require giving additional options to the encoder that the older + * match finders don't need. + */ +extern LZMA_API(lzma_bool) lzma_mf_is_supported(lzma_match_finder match_finder) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Compression modes + * + * This selects the function used to analyze the data produced by the match + * finder. + */ +typedef enum { + LZMA_MODE_FAST = 1, + /**< + * \brief Fast compression + * + * Fast mode is usually at its best when combined with + * a hash chain match finder. + */ + + LZMA_MODE_NORMAL = 2 + /**< + * \brief Normal compression + * + * This is usually notably slower than fast mode. Use this + * together with binary tree match finders to expose the + * full potential of the LZMA1 or LZMA2 encoder. + */ +} lzma_mode; + + +/** + * \brief Test if given compression mode is supported + * + * Return true if the given compression mode is supported by this liblzma + * build. Otherwise false is returned. It is safe to call this with a value + * that isn't listed in lzma_mode enumeration; the return value will be false. + * + * There is no way to list which modes are available in this particular + * liblzma version and build. It would be useless, because a new compression + * mode, which the application developer wasn't aware, could require giving + * additional options to the encoder that the older modes don't need. + */ +extern LZMA_API(lzma_bool) lzma_mode_is_supported(lzma_mode mode) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Options specific to the LZMA1 and LZMA2 filters + * + * Since LZMA1 and LZMA2 share most of the code, it's simplest to share + * the options structure too. For encoding, all but the reserved variables + * need to be initialized unless specifically mentioned otherwise. + * lzma_lzma_preset() can be used to get a good starting point. + * + * For raw decoding, both LZMA1 and LZMA2 need dict_size, preset_dict, and + * preset_dict_size (if preset_dict != NULL). LZMA1 needs also lc, lp, and pb. + */ +typedef struct { + /** + * \brief Dictionary size in bytes + * + * Dictionary size indicates how many bytes of the recently processed + * uncompressed data is kept in memory. One method to reduce size of + * the uncompressed data is to store distance-length pairs, which + * indicate what data to repeat from the dictionary buffer. Thus, + * the bigger the dictionary, the better the compression ratio + * usually is. + * + * Maximum size of the dictionary depends on multiple things: + * - Memory usage limit + * - Available address space (not a problem on 64-bit systems) + * - Selected match finder (encoder only) + * + * Currently the maximum dictionary size for encoding is 1.5 GiB + * (i.e. (UINT32_C(1) << 30) + (UINT32_C(1) << 29)) even on 64-bit + * systems for certain match finder implementation reasons. In the + * future, there may be match finders that support bigger + * dictionaries. + * + * Decoder already supports dictionaries up to 4 GiB - 1 B (i.e. + * UINT32_MAX), so increasing the maximum dictionary size of the + * encoder won't cause problems for old decoders. + * + * Because extremely small dictionaries sizes would have unneeded + * overhead in the decoder, the minimum dictionary size is 4096 bytes. + * + * \note When decoding, too big dictionary does no other harm + * than wasting memory. + */ + uint32_t dict_size; +# define LZMA_DICT_SIZE_MIN UINT32_C(4096) +# define LZMA_DICT_SIZE_DEFAULT (UINT32_C(1) << 23) + + /** + * \brief Pointer to an initial dictionary + * + * It is possible to initialize the LZ77 history window using + * a preset dictionary. It is useful when compressing many + * similar, relatively small chunks of data independently from + * each other. The preset dictionary should contain typical + * strings that occur in the files being compressed. The most + * probable strings should be near the end of the preset dictionary. + * + * This feature should be used only in special situations. For + * now, it works correctly only with raw encoding and decoding. + * Currently none of the container formats supported by + * liblzma allow preset dictionary when decoding, thus if + * you create a .xz or .lzma file with preset dictionary, it + * cannot be decoded with the regular decoder functions. In the + * future, the .xz format will likely get support for preset + * dictionary though. + */ + const uint8_t *preset_dict; + + /** + * \brief Size of the preset dictionary + * + * Specifies the size of the preset dictionary. If the size is + * bigger than dict_size, only the last dict_size bytes are + * processed. + * + * This variable is read only when preset_dict is not NULL. + * If preset_dict is not NULL but preset_dict_size is zero, + * no preset dictionary is used (identical to only setting + * preset_dict to NULL). + */ + uint32_t preset_dict_size; + + /** + * \brief Number of literal context bits + * + * How many of the highest bits of the previous uncompressed + * eight-bit byte (also known as `literal') are taken into + * account when predicting the bits of the next literal. + * + * E.g. in typical English text, an upper-case letter is + * often followed by a lower-case letter, and a lower-case + * letter is usually followed by another lower-case letter. + * In the US-ASCII character set, the highest three bits are 010 + * for upper-case letters and 011 for lower-case letters. + * When lc is at least 3, the literal coding can take advantage of + * this property in the uncompressed data. + * + * There is a limit that applies to literal context bits and literal + * position bits together: lc + lp <= 4. Without this limit the + * decoding could become very slow, which could have security related + * results in some cases like email servers doing virus scanning. + * This limit also simplifies the internal implementation in liblzma. + * + * There may be LZMA1 streams that have lc + lp > 4 (maximum possible + * lc would be 8). It is not possible to decode such streams with + * liblzma. + */ + uint32_t lc; +# define LZMA_LCLP_MIN 0 +# define LZMA_LCLP_MAX 4 +# define LZMA_LC_DEFAULT 3 + + /** + * \brief Number of literal position bits + * + * lp affects what kind of alignment in the uncompressed data is + * assumed when encoding literals. A literal is a single 8-bit byte. + * See pb below for more information about alignment. + */ + uint32_t lp; +# define LZMA_LP_DEFAULT 0 + + /** + * \brief Number of position bits + * + * pb affects what kind of alignment in the uncompressed data is + * assumed in general. The default means four-byte alignment + * (2^ pb =2^2=4), which is often a good choice when there's + * no better guess. + * + * When the aligment is known, setting pb accordingly may reduce + * the file size a little. E.g. with text files having one-byte + * alignment (US-ASCII, ISO-8859-*, UTF-8), setting pb=0 can + * improve compression slightly. For UTF-16 text, pb=1 is a good + * choice. If the alignment is an odd number like 3 bytes, pb=0 + * might be the best choice. + * + * Even though the assumed alignment can be adjusted with pb and + * lp, LZMA1 and LZMA2 still slightly favor 16-byte alignment. + * It might be worth taking into account when designing file formats + * that are likely to be often compressed with LZMA1 or LZMA2. + */ + uint32_t pb; +# define LZMA_PB_MIN 0 +# define LZMA_PB_MAX 4 +# define LZMA_PB_DEFAULT 2 + + /** Compression mode */ + lzma_mode mode; + + /** + * \brief Nice length of a match + * + * This determines how many bytes the encoder compares from the match + * candidates when looking for the best match. Once a match of at + * least nice_len bytes long is found, the encoder stops looking for + * better candidates and encodes the match. (Naturally, if the found + * match is actually longer than nice_len, the actual length is + * encoded; it's not truncated to nice_len.) + * + * Bigger values usually increase the compression ratio and + * compression time. For most files, 32 to 128 is a good value, + * which gives very good compression ratio at good speed. + * + * The exact minimum value depends on the match finder. The maximum + * is 273, which is the maximum length of a match that LZMA1 and + * LZMA2 can encode. + */ + uint32_t nice_len; + + /** Match finder ID */ + lzma_match_finder mf; + + /** + * \brief Maximum search depth in the match finder + * + * For every input byte, match finder searches through the hash chain + * or binary tree in a loop, each iteration going one step deeper in + * the chain or tree. The searching stops if + * - a match of at least nice_len bytes long is found; + * - all match candidates from the hash chain or binary tree have + * been checked; or + * - maximum search depth is reached. + * + * Maximum search depth is needed to prevent the match finder from + * wasting too much time in case there are lots of short match + * candidates. On the other hand, stopping the search before all + * candidates have been checked can reduce compression ratio. + * + * Setting depth to zero tells liblzma to use an automatic default + * value, that depends on the selected match finder and nice_len. + * The default is in the range [4, 200] or so (it may vary between + * liblzma versions). + * + * Using a bigger depth value than the default can increase + * compression ratio in some cases. There is no strict maximum value, + * but high values (thousands or millions) should be used with care: + * the encoder could remain fast enough with typical input, but + * malicious input could cause the match finder to slow down + * dramatically, possibly creating a denial of service attack. + */ + uint32_t depth; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the names + * of these variables may change. These are and will never be used + * with the currently supported options, so it is safe to leave these + * uninitialized. + */ + uint32_t reserved_int1; + uint32_t reserved_int2; + uint32_t reserved_int3; + uint32_t reserved_int4; + uint32_t reserved_int5; + uint32_t reserved_int6; + uint32_t reserved_int7; + uint32_t reserved_int8; + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + void *reserved_ptr1; + void *reserved_ptr2; + +} lzma_options_lzma; + + +/** + * \brief Set a compression preset to lzma_options_lzma structure + * + * 0 is the fastest and 9 is the slowest. These match the switches -0 .. -9 + * of the xz command line tool. In addition, it is possible to bitwise-or + * flags to the preset. Currently only LZMA_PRESET_EXTREME is supported. + * The flags are defined in container.h, because the flags are used also + * with lzma_easy_encoder(). + * + * The preset values are subject to changes between liblzma versions. + * + * This function is available only if LZMA1 or LZMA2 encoder has been enabled + * when building liblzma. + * + * \return On success, false is returned. If the preset is not + * supported, true is returned. + */ +extern LZMA_API(lzma_bool) lzma_lzma_preset( + lzma_options_lzma *options, uint32_t preset) lzma_nothrow; diff --git a/usr/include/lzma/stream_flags.h b/usr/include/lzma/stream_flags.h new file mode 100644 index 0000000..bbdd408 --- /dev/null +++ b/usr/include/lzma/stream_flags.h @@ -0,0 +1,223 @@ +/** + * \file lzma/stream_flags.h + * \brief .xz Stream Header and Stream Footer encoder and decoder + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Size of Stream Header and Stream Footer + * + * Stream Header and Stream Footer have the same size and they are not + * going to change even if a newer version of the .xz file format is + * developed in future. + */ +#define LZMA_STREAM_HEADER_SIZE 12 + + +/** + * \brief Options for encoding/decoding Stream Header and Stream Footer + */ +typedef struct { + /** + * \brief Stream Flags format version + * + * To prevent API and ABI breakages if new features are needed in + * Stream Header or Stream Footer, a version number is used to + * indicate which fields in this structure are in use. For now, + * version must always be zero. With non-zero version, the + * lzma_stream_header_encode() and lzma_stream_footer_encode() + * will return LZMA_OPTIONS_ERROR. + * + * lzma_stream_header_decode() and lzma_stream_footer_decode() + * will always set this to the lowest value that supports all the + * features indicated by the Stream Flags field. The application + * must check that the version number set by the decoding functions + * is supported by the application. Otherwise it is possible that + * the application will decode the Stream incorrectly. + */ + uint32_t version; + + /** + * \brief Backward Size + * + * Backward Size must be a multiple of four bytes. In this Stream + * format version, Backward Size is the size of the Index field. + * + * Backward Size isn't actually part of the Stream Flags field, but + * it is convenient to include in this structure anyway. Backward + * Size is present only in the Stream Footer. There is no need to + * initialize backward_size when encoding Stream Header. + * + * lzma_stream_header_decode() always sets backward_size to + * LZMA_VLI_UNKNOWN so that it is convenient to use + * lzma_stream_flags_compare() when both Stream Header and Stream + * Footer have been decoded. + */ + lzma_vli backward_size; +# define LZMA_BACKWARD_SIZE_MIN 4 +# define LZMA_BACKWARD_SIZE_MAX (LZMA_VLI_C(1) << 34) + + /** + * \brief Check ID + * + * This indicates the type of the integrity check calculated from + * uncompressed data. + */ + lzma_check check; + + /* + * Reserved space to allow possible future extensions without + * breaking the ABI. You should not touch these, because the + * names of these variables may change. + * + * (We will never be able to use all of these since Stream Flags + * is just two bytes plus Backward Size of four bytes. But it's + * nice to have the proper types when they are needed.) + */ + lzma_reserved_enum reserved_enum1; + lzma_reserved_enum reserved_enum2; + lzma_reserved_enum reserved_enum3; + lzma_reserved_enum reserved_enum4; + lzma_bool reserved_bool1; + lzma_bool reserved_bool2; + lzma_bool reserved_bool3; + lzma_bool reserved_bool4; + lzma_bool reserved_bool5; + lzma_bool reserved_bool6; + lzma_bool reserved_bool7; + lzma_bool reserved_bool8; + uint32_t reserved_int1; + uint32_t reserved_int2; + +} lzma_stream_flags; + + +/** + * \brief Encode Stream Header + * + * \param options Stream Header options to be encoded. + * options->backward_size is ignored and doesn't + * need to be initialized. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Encode Stream Footer + * + * \param options Stream Footer options to be encoded. + * \param out Beginning of the output buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Encoding was successful. + * - LZMA_OPTIONS_ERROR: options->version is not supported by + * this liblzma version. + * - LZMA_PROG_ERROR: Invalid options. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_encode( + const lzma_stream_flags *options, uint8_t *out) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Header + * + * \param options Target for the decoded Stream Header options. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * options->backward_size is always set to LZMA_VLI_UNKNOWN. This is to + * help comparing Stream Flags from Stream Header and Stream Footer with + * lzma_stream_flags_compare(). + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Header. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the header + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in the header. + * + * \note When decoding .xz files that contain multiple Streams, it may + * make sense to print "file format not recognized" only if + * decoding of the Stream Header of the _first_ Stream gives + * LZMA_FORMAT_ERROR. If non-first Stream Header gives + * LZMA_FORMAT_ERROR, the message used for LZMA_DATA_ERROR is + * probably more appropriate. + * + * For example, Stream decoder in liblzma uses LZMA_DATA_ERROR if + * LZMA_FORMAT_ERROR is returned by lzma_stream_header_decode() + * when decoding non-first Stream. + */ +extern LZMA_API(lzma_ret) lzma_stream_header_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Decode Stream Footer + * + * \param options Target for the decoded Stream Header options. + * \param in Beginning of the input buffer of + * LZMA_STREAM_HEADER_SIZE bytes. + * + * \return - LZMA_OK: Decoding was successful. + * - LZMA_FORMAT_ERROR: Magic bytes don't match, thus the given + * buffer cannot be Stream Footer. + * - LZMA_DATA_ERROR: CRC32 doesn't match, thus the Stream Footer + * is corrupt. + * - LZMA_OPTIONS_ERROR: Unsupported options are present + * in Stream Footer. + * + * \note If Stream Header was already decoded successfully, but + * decoding Stream Footer returns LZMA_FORMAT_ERROR, the + * application should probably report some other error message + * than "file format not recognized", since the file more likely + * is corrupt (possibly truncated). Stream decoder in liblzma + * uses LZMA_DATA_ERROR in this situation. + */ +extern LZMA_API(lzma_ret) lzma_stream_footer_decode( + lzma_stream_flags *options, const uint8_t *in) + lzma_nothrow lzma_attr_warn_unused_result; + + +/** + * \brief Compare two lzma_stream_flags structures + * + * backward_size values are compared only if both are not + * LZMA_VLI_UNKNOWN. + * + * \return - LZMA_OK: Both are equal. If either had backward_size set + * to LZMA_VLI_UNKNOWN, backward_size values were not + * compared or validated. + * - LZMA_DATA_ERROR: The structures differ. + * - LZMA_OPTIONS_ERROR: version in either structure is greater + * than the maximum supported version (currently zero). + * - LZMA_PROG_ERROR: Invalid value, e.g. invalid check or + * backward_size. + */ +extern LZMA_API(lzma_ret) lzma_stream_flags_compare( + const lzma_stream_flags *a, const lzma_stream_flags *b) + lzma_nothrow lzma_attr_pure; diff --git a/usr/include/lzma/version.h b/usr/include/lzma/version.h new file mode 100644 index 0000000..8bdc7f0 --- /dev/null +++ b/usr/include/lzma/version.h @@ -0,0 +1,121 @@ +/** + * \file lzma/version.h + * \brief Version number + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/* + * Version number split into components + */ +#define LZMA_VERSION_MAJOR 5 +#define LZMA_VERSION_MINOR 2 +#define LZMA_VERSION_PATCH 2 +#define LZMA_VERSION_STABILITY LZMA_VERSION_STABILITY_STABLE + +#ifndef LZMA_VERSION_COMMIT +# define LZMA_VERSION_COMMIT "" +#endif + + +/* + * Map symbolic stability levels to integers. + */ +#define LZMA_VERSION_STABILITY_ALPHA 0 +#define LZMA_VERSION_STABILITY_BETA 1 +#define LZMA_VERSION_STABILITY_STABLE 2 + + +/** + * \brief Compile-time version number + * + * The version number is of format xyyyzzzs where + * - x = major + * - yyy = minor + * - zzz = revision + * - s indicates stability: 0 = alpha, 1 = beta, 2 = stable + * + * The same xyyyzzz triplet is never reused with different stability levels. + * For example, if 5.1.0alpha has been released, there will never be 5.1.0beta + * or 5.1.0 stable. + * + * \note The version number of liblzma has nothing to with + * the version number of Igor Pavlov's LZMA SDK. + */ +#define LZMA_VERSION (LZMA_VERSION_MAJOR * UINT32_C(10000000) \ + + LZMA_VERSION_MINOR * UINT32_C(10000) \ + + LZMA_VERSION_PATCH * UINT32_C(10) \ + + LZMA_VERSION_STABILITY) + + +/* + * Macros to construct the compile-time version string + */ +#if LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_ALPHA +# define LZMA_VERSION_STABILITY_STRING "alpha" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_BETA +# define LZMA_VERSION_STABILITY_STRING "beta" +#elif LZMA_VERSION_STABILITY == LZMA_VERSION_STABILITY_STABLE +# define LZMA_VERSION_STABILITY_STRING "" +#else +# error Incorrect LZMA_VERSION_STABILITY +#endif + +#define LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) \ + #major "." #minor "." #patch stability commit + +#define LZMA_VERSION_STRING_C(major, minor, patch, stability, commit) \ + LZMA_VERSION_STRING_C_(major, minor, patch, stability, commit) + + +/** + * \brief Compile-time version as a string + * + * This can be for example "4.999.5alpha", "4.999.8beta", or "5.0.0" (stable + * versions don't have any "stable" suffix). In future, a snapshot built + * from source code repository may include an additional suffix, for example + * "4.999.8beta-21-g1d92". The commit ID won't be available in numeric form + * in LZMA_VERSION macro. + */ +#define LZMA_VERSION_STRING LZMA_VERSION_STRING_C( \ + LZMA_VERSION_MAJOR, LZMA_VERSION_MINOR, \ + LZMA_VERSION_PATCH, LZMA_VERSION_STABILITY_STRING, \ + LZMA_VERSION_COMMIT) + + +/* #ifndef is needed for use with windres (MinGW or Cygwin). */ +#ifndef LZMA_H_INTERNAL_RC + +/** + * \brief Run-time version number as an integer + * + * Return the value of LZMA_VERSION macro at the compile time of liblzma. + * This allows the application to compare if it was built against the same, + * older, or newer version of liblzma that is currently running. + */ +extern LZMA_API(uint32_t) lzma_version_number(void) + lzma_nothrow lzma_attr_const; + + +/** + * \brief Run-time version as a string + * + * This function may be useful if you want to display which version of + * liblzma your application is currently using. + */ +extern LZMA_API(const char *) lzma_version_string(void) + lzma_nothrow lzma_attr_const; + +#endif diff --git a/usr/include/lzma/vli.h b/usr/include/lzma/vli.h new file mode 100644 index 0000000..9ad13f2 --- /dev/null +++ b/usr/include/lzma/vli.h @@ -0,0 +1,166 @@ +/** + * \file lzma/vli.h + * \brief Variable-length integer handling + * + * In the .xz format, most integers are encoded in a variable-length + * representation, which is sometimes called little endian base-128 encoding. + * This saves space when smaller values are more likely than bigger values. + * + * The encoding scheme encodes seven bits to every byte, using minimum + * number of bytes required to represent the given value. Encodings that use + * non-minimum number of bytes are invalid, thus every integer has exactly + * one encoded representation. The maximum number of bits in a VLI is 63, + * thus the vli argument must be less than or equal to UINT64_MAX / 2. You + * should use LZMA_VLI_MAX for clarity. + */ + +/* + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + * + * See ../lzma.h for information about liblzma as a whole. + */ + +#ifndef LZMA_H_INTERNAL +# error Never include this file directly. Use instead. +#endif + + +/** + * \brief Maximum supported value of a variable-length integer + */ +#define LZMA_VLI_MAX (UINT64_MAX / 2) + +/** + * \brief VLI value to denote that the value is unknown + */ +#define LZMA_VLI_UNKNOWN UINT64_MAX + +/** + * \brief Maximum supported encoded length of variable length integers + */ +#define LZMA_VLI_BYTES_MAX 9 + +/** + * \brief VLI constant suffix + */ +#define LZMA_VLI_C(n) UINT64_C(n) + + +/** + * \brief Variable-length integer type + * + * Valid VLI values are in the range [0, LZMA_VLI_MAX]. Unknown value is + * indicated with LZMA_VLI_UNKNOWN, which is the maximum value of the + * underlaying integer type. + * + * lzma_vli will be uint64_t for the foreseeable future. If a bigger size + * is needed in the future, it is guaranteed that 2 * LZMA_VLI_MAX will + * not overflow lzma_vli. This simplifies integer overflow detection. + */ +typedef uint64_t lzma_vli; + + +/** + * \brief Validate a variable-length integer + * + * This is useful to test that application has given acceptable values + * for example in the uncompressed_size and compressed_size variables. + * + * \return True if the integer is representable as VLI or if it + * indicates unknown value. + */ +#define lzma_vli_is_valid(vli) \ + ((vli) <= LZMA_VLI_MAX || (vli) == LZMA_VLI_UNKNOWN) + + +/** + * \brief Encode a variable-length integer + * + * This function has two modes: single-call and multi-call. Single-call mode + * encodes the whole integer at once; it is an error if the output buffer is + * too small. Multi-call mode saves the position in *vli_pos, and thus it is + * possible to continue encoding if the buffer becomes full before the whole + * integer has been encoded. + * + * \param vli Integer to be encoded + * \param vli_pos How many VLI-encoded bytes have already been written + * out. When starting to encode a new integer in + * multi-call mode, *vli_pos must be set to zero. + * To use single-call encoding, set vli_pos to NULL. + * \param out Beginning of the output buffer + * \param out_pos The next byte will be written to out[*out_pos]. + * \param out_size Size of the out buffer; the first byte into + * which no data is written to is out[out_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully encoded. + * - LZMA_PROG_ERROR: Arguments are not sane. This can be due + * to too little output space; single-call mode doesn't use + * LZMA_BUF_ERROR, since the application should have checked + * the encoded size with lzma_vli_size(). + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely written out yet. + * - LZMA_STREAM_END: Integer successfully encoded. + * - LZMA_BUF_ERROR: No output space was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_encode(lzma_vli vli, size_t *vli_pos, + uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow; + + +/** + * \brief Decode a variable-length integer + * + * Like lzma_vli_encode(), this function has single-call and multi-call modes. + * + * \param vli Pointer to decoded integer. The decoder will + * initialize it to zero when *vli_pos == 0, so + * application isn't required to initialize *vli. + * \param vli_pos How many bytes have already been decoded. When + * starting to decode a new integer in multi-call + * mode, *vli_pos must be initialized to zero. To + * use single-call decoding, set vli_pos to NULL. + * \param in Beginning of the input buffer + * \param in_pos The next byte will be read from in[*in_pos]. + * \param in_size Size of the input buffer; the first byte that + * won't be read is in[in_size]. + * + * \return Slightly different return values are used in multi-call and + * single-call modes. + * + * Single-call (vli_pos == NULL): + * - LZMA_OK: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. This includes hitting + * the end of the input buffer before the whole integer was + * decoded; providing no input at all will use LZMA_DATA_ERROR. + * - LZMA_PROG_ERROR: Arguments are not sane. + * + * Multi-call (vli_pos != NULL): + * - LZMA_OK: So far all OK, but the integer is not + * completely decoded yet. + * - LZMA_STREAM_END: Integer successfully decoded. + * - LZMA_DATA_ERROR: Integer is corrupt. + * - LZMA_BUF_ERROR: No input was provided. + * - LZMA_PROG_ERROR: Arguments are not sane. + */ +extern LZMA_API(lzma_ret) lzma_vli_decode(lzma_vli *vli, size_t *vli_pos, + const uint8_t *in, size_t *in_pos, size_t in_size) + lzma_nothrow; + + +/** + * \brief Get the number of bytes required to encode a VLI + * + * \return Number of bytes on success (1-9). If vli isn't valid, + * zero is returned. + */ +extern LZMA_API(uint32_t) lzma_vli_size(lzma_vli vli) + lzma_nothrow lzma_attr_pure; diff --git a/usr/share/doc/xz/AUTHORS b/usr/share/doc/xz/AUTHORS new file mode 100644 index 0000000..bda8797 --- /dev/null +++ b/usr/share/doc/xz/AUTHORS @@ -0,0 +1,27 @@ + +Authors of XZ Utils +=================== + + XZ Utils is developed and maintained by Lasse Collin + . + + Major parts of liblzma are based on code written by Igor Pavlov, + specifically the LZMA SDK . Without + this code, XZ Utils wouldn't exist. + + The SHA-256 implementation in liblzma is based on the code found from + 7-Zip , which has a modified version of the SHA-256 + code found from Crypto++ . The SHA-256 code + in Crypto++ was written by Kevin Springle and Wei Dai. + + Some scripts have been adapted from gzip. The original versions + were written by Jean-loup Gailly, Charles Levert, and Paul Eggert. + Andrew Dudman helped adapting the scripts and their man pages for + XZ Utils. + + The GNU Autotools-based build system contains files from many authors, + which I'm not trying to list here. + + Several people have contributed fixes or reported bugs. Most of them + are mentioned in the file THANKS. + diff --git a/usr/share/doc/xz/COPYING b/usr/share/doc/xz/COPYING new file mode 100644 index 0000000..43c90d0 --- /dev/null +++ b/usr/share/doc/xz/COPYING @@ -0,0 +1,65 @@ + +XZ Utils Licensing +================== + + Different licenses apply to different files in this package. Here + is a rough summary of which licenses apply to which parts of this + package (but check the individual files to be sure!): + + - liblzma is in the public domain. + + - xz, xzdec, and lzmadec command line tools are in the public + domain unless GNU getopt_long had to be compiled and linked + in from the lib directory. The getopt_long code is under + GNU LGPLv2.1+. + + - The scripts to grep, diff, and view compressed files have been + adapted from gzip. These scripts and their documentation are + under GNU GPLv2+. + + - All the documentation in the doc directory and most of the + XZ Utils specific documentation files in other directories + are in the public domain. + + - Translated messages are in the public domain. + + - The build system contains public domain files, and files that + are under GNU GPLv2+ or GNU GPLv3+. None of these files end up + in the binaries being built. + + - Test files and test code in the tests directory, and debugging + utilities in the debug directory are in the public domain. + + - The extra directory may contain public domain files, and files + that are under various free software licenses. + + You can do whatever you want with the files that have been put into + the public domain. If you find public domain legally problematic, + take the previous sentence as a license grant. If you still find + the lack of copyright legally problematic, you have too many + lawyers. + + As usual, this software is provided "as is", without any warranty. + + If you copy significant amounts of public domain code from XZ Utils + into your project, acknowledging this somewhere in your software is + polite (especially if it is proprietary, non-free software), but + naturally it is not legally required. Here is an example of a good + notice to put into "about box" or into documentation: + + This software includes code from XZ Utils . + + The following license texts are included in the following files: + - COPYING.LGPLv2.1: GNU Lesser General Public License version 2.1 + - COPYING.GPLv2: GNU General Public License version 2 + - COPYING.GPLv3: GNU General Public License version 3 + + Note that the toolchain (compiler, linker etc.) may add some code + pieces that are copyrighted. Thus, it is possible that e.g. liblzma + binary wouldn't actually be in the public domain in its entirety + even though it contains no copyrighted code from the XZ Utils source + package. + + If you have questions, don't hesitate to ask the author(s) for more + information. + diff --git a/usr/share/doc/xz/COPYING.GPLv2 b/usr/share/doc/xz/COPYING.GPLv2 new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/usr/share/doc/xz/COPYING.GPLv2 @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/usr/share/doc/xz/NEWS b/usr/share/doc/xz/NEWS new file mode 100644 index 0000000..24166e4 --- /dev/null +++ b/usr/share/doc/xz/NEWS @@ -0,0 +1,505 @@ + +XZ Utils Release Notes +====================== + +5.2.2 (2015-09-29) + + * Fixed bugs in QNX-specific code. + + * Omitted the use of pipe2() even if it is available to avoid + portability issues with some old Linux and glibc combinations. + + * Updated German translation. + + * Added project files to build static and shared liblzma (not the + whole XZ Utils) with Visual Studio 2013 update 2 or later. + + * Documented that threaded decompression hasn't been implemented + yet. A 5.2.0 NEWS entry describing multi-threading support had + incorrectly said "decompression" when it should have said + "compression". + + +5.2.1 (2015-02-26) + + * Fixed a compression-ratio regression in fast mode of LZMA1 and + LZMA2. The bug is present in 5.1.4beta and 5.2.0 releases. + + * Fixed a portability problem in xz that affected at least OpenBSD. + + * Fixed xzdiff to be compatible with FreeBSD's mktemp which differs + from most other mktemp implementations. + + * Changed CPU core count detection to use cpuset_getaffinity() on + FreeBSD. + + +5.2.0 (2014-12-21) + + Since 5.1.4beta: + + * All fixes from 5.0.8 + + * liblzma: Fixed lzma_stream_encoder_mt_memusage() when a preset + was used. + + * xzdiff: If mktemp isn't installed, mkdir will be used as + a fallback to create a temporary directory. Installing mktemp + is still recommended. + + * Updated French, German, Italian, Polish, and Vietnamese + translations. + + Summary of fixes and new features added in the 5.1.x development + releases: + + * liblzma: + + - Added support for multi-threaded compression. See the + lzma_mt structure, lzma_stream_encoder_mt(), and + lzma_stream_encoder_mt_memusage() in , + lzma_get_progress() in , and lzma_cputhreads() + in for details. + + - Made the uses of lzma_allocator const correct. + + - Added lzma_block_uncomp_encode() to create uncompressed + .xz Blocks using LZMA2 uncompressed chunks. + + - Added support for LZMA_IGNORE_CHECK. + + - A few speed optimizations were made. + + - Added support for symbol versioning. It is enabled by default + on GNU/Linux, other GNU-based systems, and FreeBSD. + + - liblzma (not the whole XZ Utils) should now be buildable + with MSVC 2013 update 2 or later using windows/config.h. + + * xz: + + - Fixed a race condition in the signal handling. It was + possible that e.g. the first SIGINT didn't make xz exit + if reading or writing blocked and one had bad luck. The fix + is non-trivial, so as of writing it is unknown if it will be + backported to the v5.0 branch. + + - Multi-threaded compression can be enabled with the + --threads (-T) option. + [Fixed: This originally said "decompression".] + + - New command line options in xz: --single-stream, + --block-size=SIZE, --block-list=SIZES, + --flush-timeout=TIMEOUT, and --ignore-check. + + - xz -lvv now shows the minimum xz version that is required to + decompress the file. Currently it is 5.0.0 for all supported + .xz files except files with empty LZMA2 streams require 5.0.2. + + * xzdiff and xzgrep now support .lzo files if lzop is installed. + The .tzo suffix is also recognized as a shorthand for .tar.lzo. + + +5.1.4beta (2014-09-14) + + * All fixes from 5.0.6 + + * liblzma: Fixed the use of presets in threaded encoder + initialization. + + * xz --block-list and --block-size can now be used together + in single-threaded mode. Previously the combination only + worked in multi-threaded mode. + + * Added support for LZMA_IGNORE_CHECK to liblzma and made it + available in xz as --ignore-check. + + * liblzma speed optimizations: + + - Initialization of a new LZMA1 or LZMA2 encoder has been + optimized. (The speed of reinitializing an already-allocated + encoder isn't affected.) This helps when compressing many + small buffers with lzma_stream_buffer_encode() and other + similar situations where an already-allocated encoder state + isn't reused. This speed-up is visible in xz too if one + compresses many small files one at a time instead running xz + once and giving all files as command-line arguments. + + - Buffer comparisons are now much faster when unaligned access + is allowed (configured with --enable-unaligned-access). This + speeds up encoding significantly. There is arch-specific code + for 32-bit and 64-bit x86 (32-bit needs SSE2 for the best + results and there's no run-time CPU detection for now). + For other archs there is only generic code which probably + isn't as optimal as arch-specific solutions could be. + + - A few speed optimizations were made to the SHA-256 code. + (Note that the builtin SHA-256 code isn't used on all + operating systems.) + + * liblzma can now be built with MSVC 2013 update 2 or later + using windows/config.h. + + * Vietnamese translation was added. + + +5.1.3alpha (2013-10-26) + + * All fixes from 5.0.5 + + * liblzma: + + - Fixed a deadlock in the threaded encoder. + + - Made the uses of lzma_allocator const correct. + + - Added lzma_block_uncomp_encode() to create uncompressed + .xz Blocks using LZMA2 uncompressed chunks. + + - Added support for native threads on Windows and the ability + to detect the number of CPU cores. + + * xz: + + - Fixed a race condition in the signal handling. It was + possible that e.g. the first SIGINT didn't make xz exit + if reading or writing blocked and one had bad luck. The fix + is non-trivial, so as of writing it is unknown if it will be + backported to the v5.0 branch. + + - Made the progress indicator work correctly in threaded mode. + + - Threaded encoder now works together with --block-list=SIZES. + + - Added preliminary support for --flush-timeout=TIMEOUT. + It can be useful for (somewhat) real-time streaming. For + now the decompression side has to be done with something + else than the xz tool due to how xz does buffering, but this + should be fixed. + + +5.1.2alpha (2012-07-04) + + * All fixes from 5.0.3 and 5.0.4 + + * liblzma: + + - Fixed a deadlock and an invalid free() in the threaded encoder. + + - Added support for symbol versioning. It is enabled by default + on GNU/Linux, other GNU-based systems, and FreeBSD. + + - Use SHA-256 implementation from the operating system if one is + available in libc, libmd, or libutil. liblzma won't use e.g. + OpenSSL or libgcrypt to avoid introducing new dependencies. + + - Fixed liblzma.pc for static linking. + + - Fixed a few portability bugs. + + * xz --decompress --single-stream now fixes the input position after + successful decompression. Now the following works: + + echo foo | xz > foo.xz + echo bar | xz >> foo.xz + ( xz -dc --single-stream ; xz -dc --single-stream ) < foo.xz + + Note that it doesn't work if the input is not seekable + or if there is Stream Padding between the concatenated + .xz Streams. + + * xz -lvv now shows the minimum xz version that is required to + decompress the file. Currently it is 5.0.0 for all supported .xz + files except files with empty LZMA2 streams require 5.0.2. + + * Added an *incomplete* implementation of --block-list=SIZES to xz. + It only works correctly in single-threaded mode and when + --block-size isn't used at the same time. --block-list allows + specifying the sizes of Blocks which can be useful e.g. when + creating files for random-access reading. + + +5.1.1alpha (2011-04-12) + + * All fixes from 5.0.2 + + * liblzma fixes that will also be included in 5.0.3: + + - A memory leak was fixed. + + - lzma_stream_buffer_encode() no longer creates an empty .xz + Block if encoding an empty buffer. Such an empty Block with + LZMA2 data would trigger a bug in 5.0.1 and older (see the + first bullet point in 5.0.2 notes). When releasing 5.0.2, + I thought that no encoder creates this kind of files but + I was wrong. + + - Validate function arguments better in a few functions. Most + importantly, specifying an unsupported integrity check to + lzma_stream_buffer_encode() no longer creates a corrupt .xz + file. Probably no application tries to do that, so this + shouldn't be a big problem in practice. + + - Document that lzma_block_buffer_encode(), + lzma_easy_buffer_encode(), lzma_stream_encoder(), and + lzma_stream_buffer_encode() may return LZMA_UNSUPPORTED_CHECK. + + - The return values of the _memusage() functions are now + documented better. + + * Support for multithreaded compression was added using the simplest + method, which splits the input data into blocks and compresses + them independently. Other methods will be added in the future. + The current method has room for improvement, e.g. it is possible + to reduce the memory usage. + + * Added the options --single-stream and --block-size=SIZE to xz. + + * xzdiff and xzgrep now support .lzo files if lzop is installed. + The .tzo suffix is also recognized as a shorthand for .tar.lzo. + + * Support for short 8.3 filenames under DOS was added to xz. It is + experimental and may change before it gets into a stable release. + + +5.0.8 (2014-12-21) + + * Fixed an old bug in xzgrep that affected OpenBSD and probably + a few other operating systems too. + + * Updated French and German translations. + + * Added support for detecting the amount of RAM on AmigaOS/AROS. + + * Minor build system updates. + + +5.0.7 (2014-09-20) + + * Fix regressions introduced in 5.0.6: + + - Fix building with non-GNU make. + + - Fix invalid Libs.private value in liblzma.pc which broke + static linking against liblzma if the linker flags were + taken from pkg-config. + + +5.0.6 (2014-09-14) + + * xzgrep now exits with status 0 if at least one file matched. + + * A few minor portability and build system fixes + + +5.0.5 (2013-06-30) + + * lzmadec and liblzma's lzma_alone_decoder(): Support decompressing + .lzma files that have less common settings in the headers + (dictionary size other than 2^n or 2^n + 2^(n-1), or uncompressed + size greater than 256 GiB). The limitations existed to avoid false + positives when detecting .lzma files. The lc + lp <= 4 limitation + still remains since liblzma's LZMA decoder has that limitation. + + NOTE: xz's .lzma support or liblzma's lzma_auto_decoder() are NOT + affected by this change. They still consider uncommon .lzma headers + as not being in the .lzma format. Changing this would give way too + many false positives. + + * xz: + + - Interaction of preset and custom filter chain options was + made less illogical. This affects only certain less typical + uses cases so few people are expected to notice this change. + + Now when a custom filter chain option (e.g. --lzma2) is + specified, all preset options (-0 ... -9, -e) earlier are on + the command line are completely forgotten. Similarly, when + a preset option is specified, all custom filter chain options + earlier on the command line are completely forgotten. + + Example 1: "xz -9 --lzma2=preset=5 -e" is equivalent to "xz -e" + which is equivalent to "xz -6e". Earlier -e didn't put xz back + into preset mode and thus the example command was equivalent + to "xz --lzma2=preset=5". + + Example 2: "xz -9e --lzma2=preset=5 -7" is equivalent to + "xz -7". Earlier a custom filter chain option didn't make + xz forget the -e option so the example was equivalent to + "xz -7e". + + - Fixes and improvements to error handling. + + - Various fixes to the man page. + + * xzless: Fixed to work with "less" versions 448 and later. + + * xzgrep: Made -h an alias for --no-filename. + + * Include the previously missing debug/translation.bash which can + be useful for translators. + + * Include a build script for Mac OS X. This has been in the Git + repository since 2010 but due to a mistake in Makefile.am the + script hasn't been included in a release tarball before. + + +5.0.4 (2012-06-22) + + * liblzma: + + - Fix lzma_index_init(). It could crash if memory allocation + failed. + + - Fix the possibility of an incorrect LZMA_BUF_ERROR when a BCJ + filter is used and the application only provides exactly as + much output space as is the uncompressed size of the file. + + - Fix a bug in doc/examples_old/xz_pipe_decompress.c. It didn't + check if the last call to lzma_code() really returned + LZMA_STREAM_END, which made the program think that truncated + files are valid. + + - New example programs in doc/examples (old programs are now in + doc/examples_old). These have more comments and more detailed + error handling. + + * Fix "xz -lvv foo.xz". It could crash on some corrupted files. + + * Fix output of "xz --robot -lv" and "xz --robot -lvv" which + incorrectly printed the filename also in the "foo (x/x)" format. + + * Fix exit status of "xzdiff foo.xz bar.xz". + + * Fix exit status of "xzgrep foo binary_file". + + * Fix portability to EBCDIC systems. + + * Fix a configure issue on AIX with the XL C compiler. See INSTALL + for details. + + * Update French, German, Italian, and Polish translations. + + +5.0.3 (2011-05-21) + + * liblzma fixes: + + - A memory leak was fixed. + + - lzma_stream_buffer_encode() no longer creates an empty .xz + Block if encoding an empty buffer. Such an empty Block with + LZMA2 data would trigger a bug in 5.0.1 and older (see the + first bullet point in 5.0.2 notes). When releasing 5.0.2, + I thought that no encoder creates this kind of files but + I was wrong. + + - Validate function arguments better in a few functions. Most + importantly, specifying an unsupported integrity check to + lzma_stream_buffer_encode() no longer creates a corrupt .xz + file. Probably no application tries to do that, so this + shouldn't be a big problem in practice. + + - Document that lzma_block_buffer_encode(), + lzma_easy_buffer_encode(), lzma_stream_encoder(), and + lzma_stream_buffer_encode() may return LZMA_UNSUPPORTED_CHECK. + + - The return values of the _memusage() functions are now + documented better. + + * Fix command name detection in xzgrep. xzegrep and xzfgrep now + correctly use egrep and fgrep instead of grep. + + * French translation was added. + + +5.0.2 (2011-04-01) + + * LZMA2 decompressor now correctly accepts LZMA2 streams with no + uncompressed data. Previously it considered them corrupt. The + bug can affect applications that use raw LZMA2 streams. It is + very unlikely to affect .xz files because no compressor creates + .xz files with empty LZMA2 streams. (Empty .xz files are a + different thing than empty LZMA2 streams.) + + * "xz --suffix=.foo filename.foo" now refuses to compress the + file due to it already having the suffix .foo. It was already + documented on the man page, but the code lacked the test. + + * "xzgrep -l foo bar.xz" works now. + + * Polish translation was added. + + +5.0.1 (2011-01-29) + + * xz --force now (de)compresses files that have setuid, setgid, + or sticky bit set and files that have multiple hard links. + The man page had it documented this way already, but the code + had a bug. + + * gzip and bzip2 support in xzdiff was fixed. + + * Portability fixes + + * Minor fix to Czech translation + + +5.0.0 (2010-10-23) + + Only the most important changes compared to 4.999.9beta are listed + here. One change is especially important: + + * The memory usage limit is now disabled by default. Some scripts + written before this change may have used --memory=max on xz command + line or in XZ_OPT. THESE USES OF --memory=max SHOULD BE REMOVED + NOW, because they interfere with user's ability to set the memory + usage limit himself. If user-specified limit causes problems to + your script, blame the user. + + Other significant changes: + + * Added support for XZ_DEFAULTS environment variable. This variable + allows users to set default options for xz, e.g. default memory + usage limit or default compression level. Scripts that use xz + must never set or unset XZ_DEFAULTS. Scripts should use XZ_OPT + instead if they need a way to pass options to xz via an + environment variable. + + * The compression settings associated with the preset levels + -0 ... -9 have been changed. --extreme was changed a little too. + It is now less likely to make compression worse, but with some + files the new --extreme may compress slightly worse than the old + --extreme. + + * If a preset level (-0 ... -9) is specified after a custom filter + chain options have been used (e.g. --lzma2), the custom filter + chain will be forgotten. Earlier the preset options were + completely ignored after custom filter chain options had been + seen. + + * xz will create sparse files when decompressing if the uncompressed + data contains long sequences of binary zeros. This is done even + when writing to standard output that is connected to a regular + file and certain additional conditions are met to make it safe. + + * Support for "xz --list" was added. Combine with --verbose or + --verbose --verbose (-vv) for detailed output. + + * I had hoped that liblzma API would have been stable after + 4.999.9beta, but there have been a couple of changes in the + advanced features, which don't affect most applications: + + - Index handling code was revised. If you were using the old + API, you will get a compiler error (so it's easy to notice). + + - A subtle but important change was made to the Block handling + API. lzma_block.version has to be initialized even for + lzma_block_header_decode(). Code that doesn't do it will work + for now, but might break in the future, which makes this API + change easy to miss. + + * The major soname has been bumped to 5.0.0. liblzma API and ABI + are now stable, so the need to recompile programs linking against + liblzma shouldn't arise soon. + diff --git a/usr/share/doc/xz/README b/usr/share/doc/xz/README new file mode 100644 index 0000000..ab8aadf --- /dev/null +++ b/usr/share/doc/xz/README @@ -0,0 +1,308 @@ + +XZ Utils +======== + + 0. Overview + 1. Documentation + 1.1. Overall documentation + 1.2. Documentation for command-line tools + 1.3. Documentation for liblzma + 2. Version numbering + 3. Reporting bugs + 4. Translating the xz tool + 5. Other implementations of the .xz format + 6. Contact information + + +0. Overview +----------- + + XZ Utils provide a general-purpose data-compression library plus + command-line tools. The native file format is the .xz format, but + also the legacy .lzma format is supported. The .xz format supports + multiple compression algorithms, which are called "filters" in the + context of XZ Utils. The primary filter is currently LZMA2. With + typical files, XZ Utils create about 30 % smaller files than gzip. + + To ease adapting support for the .xz format into existing applications + and scripts, the API of liblzma is somewhat similar to the API of the + popular zlib library. For the same reason, the command-line tool xz + has a command-line syntax similar to that of gzip. + + When aiming for the highest compression ratio, the LZMA2 encoder uses + a lot of CPU time and may use, depending on the settings, even + hundreds of megabytes of RAM. However, in fast modes, the LZMA2 encoder + competes with bzip2 in compression speed, RAM usage, and compression + ratio. + + LZMA2 is reasonably fast to decompress. It is a little slower than + gzip, but a lot faster than bzip2. Being fast to decompress means + that the .xz format is especially nice when the same file will be + decompressed very many times (usually on different computers), which + is the case e.g. when distributing software packages. In such + situations, it's not too bad if the compression takes some time, + since that needs to be done only once to benefit many people. + + With some file types, combining (or "chaining") LZMA2 with an + additional filter can improve the compression ratio. A filter chain may + contain up to four filters, although usually only one or two are used. + For example, putting a BCJ (Branch/Call/Jump) filter before LZMA2 + in the filter chain can improve compression ratio of executable files. + + Since the .xz format allows adding new filter IDs, it is possible that + some day there will be a filter that is, for example, much faster to + compress than LZMA2 (but probably with worse compression ratio). + Similarly, it is possible that some day there is a filter that will + compress better than LZMA2. + + XZ Utils doesn't support multithreaded compression or decompression + yet. It has been planned though and taken into account when designing + the .xz file format. + + +1. Documentation +---------------- + +1.1. Overall documentation + + README This file + + INSTALL.generic Generic install instructions for those not familiar + with packages using GNU Autotools + INSTALL Installation instructions specific to XZ Utils + PACKAGERS Information to packagers of XZ Utils + + COPYING XZ Utils copyright and license information + COPYING.GPLv2 GNU General Public License version 2 + COPYING.GPLv3 GNU General Public License version 3 + COPYING.LGPLv2.1 GNU Lesser General Public License version 2.1 + + AUTHORS The main authors of XZ Utils + THANKS Incomplete list of people who have helped making + this software + NEWS User-visible changes between XZ Utils releases + ChangeLog Detailed list of changes (commit log) + TODO Known bugs and some sort of to-do list + + Note that only some of the above files are included in binary + packages. + + +1.2. Documentation for command-line tools + + The command-line tools are documented as man pages. In source code + releases (and possibly also in some binary packages), the man pages + are also provided in plain text (ASCII only) and PDF formats in the + directory "doc/man" to make the man pages more accessible to those + whose operating system doesn't provide an easy way to view man pages. + + +1.3. Documentation for liblzma + + The liblzma API headers include short docs about each function + and data type as Doxygen tags. These docs should be quite OK as + a quick reference. + + I have planned to write a bunch of very well documented example + programs, which (due to comments) should work as a tutorial to + various features of liblzma. No such example programs have been + written yet. + + For now, if you have never used liblzma, libbzip2, or zlib, I + recommend learning the *basics* of the zlib API. Once you know that, + it should be easier to learn liblzma. + + http://zlib.net/manual.html + http://zlib.net/zlib_how.html + + +2. Version numbering +-------------------- + + The version number format of XZ Utils is X.Y.ZS: + + - X is the major version. When this is incremented, the library + API and ABI break. + + - Y is the minor version. It is incremented when new features + are added without breaking the existing API or ABI. An even Y + indicates a stable release and an odd Y indicates unstable + (alpha or beta version). + + - Z is the revision. This has a different meaning for stable and + unstable releases: + + * Stable: Z is incremented when bugs get fixed without adding + any new features. This is intended to be convenient for + downstream distributors that want bug fixes but don't want + any new features to minimize the risk of introducing new bugs. + + * Unstable: Z is just a counter. API or ABI of features added + in earlier unstable releases having the same X.Y may break. + + - S indicates stability of the release. It is missing from the + stable releases, where Y is an even number. When Y is odd, S + is either "alpha" or "beta" to make it very clear that such + versions are not stable releases. The same X.Y.Z combination is + not used for more than one stability level, i.e. after X.Y.Zalpha, + the next version can be X.Y.(Z+1)beta but not X.Y.Zbeta. + + +3. Reporting bugs +----------------- + + Naturally it is easiest for me if you already know what causes the + unexpected behavior. Even better if you have a patch to propose. + However, quite often the reason for unexpected behavior is unknown, + so here are a few things to do before sending a bug report: + + 1. Try to create a small example how to reproduce the issue. + + 2. Compile XZ Utils with debugging code using configure switches + --enable-debug and, if possible, --disable-shared. If you are + using GCC, use CFLAGS='-O0 -ggdb3'. Don't strip the resulting + binaries. + + 3. Turn on core dumps. The exact command depends on your shell; + for example in GNU bash it is done with "ulimit -c unlimited", + and in tcsh with "limit coredumpsize unlimited". + + 4. Try to reproduce the suspected bug. If you get "assertion failed" + message, be sure to include the complete message in your bug + report. If the application leaves a coredump, get a backtrace + using gdb: + $ gdb /path/to/app-binary # Load the app to the debugger. + (gdb) core core # Open the coredump. + (gdb) bt # Print the backtrace. Copy & paste to bug report. + (gdb) quit # Quit gdb. + + Report your bug via email or IRC (see Contact information below). + Don't send core dump files or any executables. If you have a small + example file(s) (total size less than 256 KiB), please include + it/them as an attachment. If you have bigger test files, put them + online somewhere and include a URL to the file(s) in the bug report. + + Always include the exact version number of XZ Utils in the bug report. + If you are using a snapshot from the git repository, use "git describe" + to get the exact snapshot version. If you are using XZ Utils shipped + in an operating system distribution, mention the distribution name, + distribution version, and exact xz package version; if you cannot + repeat the bug with the code compiled from unpatched source code, + you probably need to report a bug to your distribution's bug tracking + system. + + +4. Translating the xz tool +-------------------------- + + The messages from the xz tool have been translated into a few + languages. Before starting to translate into a new language, ask + the author whether someone else hasn't already started working on it. + + Test your translation. Testing includes comparing the translated + output to the original English version by running the same commands + in both your target locale and with LC_ALL=C. Ask someone to + proof-read and test the translation. + + Testing can be done e.g. by installing xz into a temporary directory: + + ./configure --disable-shared --prefix=/tmp/xz-test + # + make -C po update-po + make install + bash debug/translation.bash | less + bash debug/translation.bash | less -S # For --list outputs + + Repeat the above as needed (no need to re-run configure though). + + Note especially the following: + + - The output of --help and --long-help must look nice on + an 80-column terminal. It's OK to add extra lines if needed. + + - In contrast, don't add extra lines to error messages and such. + They are often preceded with e.g. a filename on the same line, + so you have no way to predict where to put a \n. Let the terminal + do the wrapping even if it looks ugly. Adding new lines will be + even uglier in the generic case even if it looks nice in a few + limited examples. + + - Be careful with column alignment in tables and table-like output + (--list, --list --verbose --verbose, --info-memory, --help, and + --long-help): + + * All descriptions of options in --help should start in the + same column (but it doesn't need to be the same column as + in the English messages; just be consistent if you change it). + Check that both --help and --long-help look OK, since they + share several strings. + + * --list --verbose and --info-memory print lines that have + the format "Description: %s". If you need a longer + description, you can put extra space between the colon + and %s. Then you may need to add extra space to other + strings too so that the result as a whole looks good (all + values start at the same column). + + * The columns of the actual tables in --list --verbose --verbose + should be aligned properly. Abbreviate if necessary. It might + be good to keep at least 2 or 3 spaces between column headings + and avoid spaces in the headings so that the columns stand out + better, but this is a matter of opinion. Do what you think + looks best. + + - Be careful to put a period at the end of a sentence when the + original version has it, and don't put it when the original + doesn't have it. Similarly, be careful with \n characters + at the beginning and end of the strings. + + - Read the TRANSLATORS comments that have been extracted from the + source code and included in xz.pot. If they suggest testing the + translation with some type of command, do it. If testing needs + input files, use e.g. tests/files/good-*.xz. + + - When updating the translation, read the fuzzy (modified) strings + carefully, and don't mark them as updated before you actually + have updated them. Reading through the unchanged messages can be + good too; sometimes you may find a better wording for them. + + - If you find language problems in the original English strings, + feel free to suggest improvements. Ask if something is unclear. + + - The translated messages should be understandable (sometimes this + may be a problem with the original English messages too). Don't + make a direct word-by-word translation from English especially if + the result doesn't sound good in your language. + + In short, take your time and pay attention to the details. Making + a good translation is not a quick and trivial thing to do. The + translated xz should look as polished as the English version. + + +5. Other implementations of the .xz format +------------------------------------------ + + 7-Zip and the p7zip port of 7-Zip support the .xz format starting + from the version 9.00alpha. + + http://7-zip.org/ + http://p7zip.sourceforge.net/ + + XZ Embedded is a limited implementation written for use in the Linux + kernel, but it is also suitable for other embedded use. + + http://tukaani.org/xz/embedded.html + + +6. Contact information +---------------------- + + If you have questions, bug reports, patches etc. related to XZ Utils, + contact Lasse Collin (in Finnish or English). + I'm sometimes slow at replying. If you haven't got a reply within two + weeks, assume that your email has got lost and resend it or use IRC. + + You can find me also from #tukaani on Freenode; my nick is Larhzu. + The channel tends to be pretty quiet, so just ask your question and + someone may wake up. + diff --git a/usr/share/doc/xz/THANKS b/usr/share/doc/xz/THANKS new file mode 100644 index 0000000..d9c62d4 --- /dev/null +++ b/usr/share/doc/xz/THANKS @@ -0,0 +1,116 @@ + +Thanks +====== + +Some people have helped more, some less, but nevertheless everyone's help +has been important. :-) In alphabetical order: + - Mark Adler + - H. Peter Anvin + - Jeff Bastian + - Nelson H. F. Beebe + - Karl Berry + - Anders F. Björklund + - Emmanuel Blot + - Martin Blumenstingl + - Jakub Bogusz + - Maarten Bosmans + - Trent W. Buck + - James Buren + - David Burklund + - Daniel Mealha Cabrita + - Milo Casagrande + - Marek Černocký + - Tomer Chachamu + - Gabi Davar + - Chris Donawa + - Andrew Dudman + - Markus Duft + - İsmail Dönmez + - Robert Elz + - Gilles Espinasse + - Denis Excoffier + - Michael Felt + - Michael Fox + - Mike Frysinger + - Daniel Richard G. + - Bill Glessner + - Jason Gorski + - Juan Manuel Guerrero + - Diederik de Haas + - Joachim Henke + - Christian Hesse + - Vincenzo Innocente + - Peter Ivanov + - Jouk Jansen + - Jun I Jin + - Per Øyvind Karlsen + - Thomas Klausner + - Richard Koch + - Ville Koskinen + - Jan Kratochvil + - Christian Kujau + - Stephan Kulow + - Peter Lawler + - James M Leddy + - Hin-Tak Leung + - Andraž 'ruskie' Levstik + - Cary Lewis + - Wim Lewis + - Lorenzo De Liso + - Bela Lubkin + - Gregory Margo + - Jim Meyering + - Arkadiusz Miskiewicz + - Conley Moorhous + - Rafał Mużyło + - Adrien Nader + - Hongbo Ni + - Jonathan Nieder + - Andre Noll + - Peter O'Gorman + - Peter Pallinger + - Rui Paulo + - Igor Pavlov + - Diego Elio Pettenò + - Elbert Pol + - Mikko Pouru + - Trần Ngọc Quân + - Pavel Raiskup + - Ole André Vadla Ravnås + - Robert Readman + - Bernhard Reutner-Fischer + - Eric S. Raymond + - Cristian Rodríguez + - Christian von Roques + - Torsten Rupp + - Jukka Salmi + - Alexandre Sauvé + - Benno Schulenberg + - Andreas Schwab + - Dan Shechter + - Stuart Shelton + - Brad Smith + - Jonathan Stott + - Dan Stromberg + - Vincent Torri + - Paul Townsend + - Mohammed Adnène Trojette + - Alexey Tourbin + - Patrick J. Volkerding + - Martin Väth + - Adam Walling + - Christian Weisgerber + - Bert Wesarg + - Fredrik Wikstrom + - Ralf Wildenhues + - Charles Wilson + - Lars Wirzenius + - Pilorz Wojciech + - Ryan Young + - Andreas Zieringer + +Also thanks to all the people who have participated in the Tukaani project. + +I have probably forgot to add some names to the above list. Sorry about +that and thanks for your help. + diff --git a/usr/share/doc/xz/TODO b/usr/share/doc/xz/TODO new file mode 100644 index 0000000..45ba433 --- /dev/null +++ b/usr/share/doc/xz/TODO @@ -0,0 +1,111 @@ + +XZ Utils To-Do List +=================== + +Known bugs +---------- + + The test suite is too incomplete. + + If the memory usage limit is less than about 13 MiB, xz is unable to + automatically scale down the compression settings enough even though + it would be possible by switching from BT2/BT3/BT4 match finder to + HC3/HC4. + + XZ Utils compress some files significantly worse than LZMA Utils. + This is due to faster compression presets used by XZ Utils, and + can often be worked around by using "xz --extreme". With some files + --extreme isn't enough though: it's most likely with files that + compress extremely well, so going from compression ratio of 0.003 + to 0.004 means big relative increase in the compressed file size. + + xz doesn't quote unprintable characters when it displays file names + given on the command line. + + tuklib_exit() doesn't block signals => EINTR is possible. + + SIGTSTP is not handled. If xz is stopped, the estimated remaining + time and calculated (de)compression speed won't make sense in the + progress indicator (xz --verbose). + + If liblzma has created threads and fork() gets called, liblzma + code will break in the child process unless it calls exec() and + doesn't touch liblzma. + + +Missing features +---------------- + + Add support for storing metadata in .xz files. A preliminary + idea is to create a new Stream type for metadata. When both + metadata and data are wanted in the same .xz file, two or more + Streams would be concatenated. + + The state stored in lzma_stream should be cloneable, which would + be mostly useful when using a preset dictionary in LZMA2, but + it may have other uses too. Compare to deflateCopy() in zlib. + + Support LZMA_FINISH in raw decoder to indicate end of LZMA1 and + other streams that don't have an end of payload marker. + + Adjust dictionary size when the input file size is known. + Maybe do this only if an option is given. + + xz doesn't support copying extended attributes, access control + lists etc. from source to target file. + + Multithreaded compression: + - Reduce memory usage of the current method. + - Implement threaded match finders. + - Implement pigz-style threading in LZMA2. + + Multithreaded decompression + + Buffer-to-buffer coding could use less RAM (especially when + decompressing LZMA1 or LZMA2). + + I/O library is not implemented (similar to gzopen() in zlib). + It will be a separate library that supports uncompressed, .gz, + .bz2, .lzma, and .xz files. + + Support changing lzma_options_lzma.mode with lzma_filters_update(). + + Support LZMA_FULL_FLUSH for lzma_stream_decoder() to stop at + Block and Stream boundaries. + + lzma_strerror() to convert lzma_ret to human readable form? + This is tricky, because the same error codes are used with + slightly different meanings, and this cannot be fixed anymore. + + Make it possible to adjust LZMA2 options in the middle of a Block + so that the encoding speed vs. compression ratio can be optimized + when the compressed data is streamed over network. + + Improved BCJ filters. The current filters are small but they aren't + so great when compressing binary packages that contain various file + types. Specifically, they make things worse if there are static + libraries or Linux kernel modules. The filtering could also be + more effective (without getting overly complex), for example, + streamable variant BCJ2 from 7-Zip could be implemented. + + Filter that autodetects specific data types in the input stream + and applies appropriate filters for the corrects parts of the input. + Perhaps combine this with the BCJ filter improvement point above. + + Long-range LZ77 method as a separate filter or as a new LZMA2 + match finder. + + +Documentation +------------- + + More tutorial programs are needed for liblzma. + + Document the LZMA1 and LZMA2 algorithms. + + +Miscellaneous +------------ + + Try to get the media type for .xz registered at IANA. + diff --git a/usr/share/doc/xz/examples/00_README.txt b/usr/share/doc/xz/examples/00_README.txt new file mode 100644 index 0000000..120e1eb --- /dev/null +++ b/usr/share/doc/xz/examples/00_README.txt @@ -0,0 +1,31 @@ + +liblzma example programs +======================== + +Introduction + + The examples are written so that the same comments aren't + repeated (much) in later files. + + On POSIX systems, the examples should build by just typing "make". + + The examples that use stdin or stdout don't set stdin and stdout + to binary mode. On systems where it matters (e.g. Windows) it is + possible that the examples won't work without modification. + + +List of examples + + 01_compress_easy.c Multi-call compression using + a compression preset + + 02_decompress.c Multi-call decompression + + 03_compress_custom.c Like 01_compress_easy.c but using + a custom filter chain + (x86 BCJ + LZMA2) + + 04_compress_easy_mt.c Multi-threaded multi-call + compression using a compression + preset + diff --git a/usr/share/doc/xz/examples/01_compress_easy.c b/usr/share/doc/xz/examples/01_compress_easy.c new file mode 100644 index 0000000..e6dd2b0 --- /dev/null +++ b/usr/share/doc/xz/examples/01_compress_easy.c @@ -0,0 +1,297 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file 01_compress_easy.c +/// \brief Compress from stdin to stdout in multi-call mode +/// +/// Usage: ./01_compress_easy PRESET < INFILE > OUTFILE +/// +/// Example: ./01_compress_easy 6 < foo > foo.xz +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include + + +static void +show_usage_and_exit(const char *argv0) +{ + fprintf(stderr, "Usage: %s PRESET < INFILE > OUTFILE\n" + "PRESET is a number 0-9 and can optionally be " + "followed by `e' to indicate extreme preset\n", + argv0); + exit(EXIT_FAILURE); +} + + +static uint32_t +get_preset(int argc, char **argv) +{ + // One argument whose first char must be 0-9. + if (argc != 2 || argv[1][0] < '0' || argv[1][0] > '9') + show_usage_and_exit(argv[0]); + + // Calculate the preste level 0-9. + uint32_t preset = argv[1][0] - '0'; + + // If there is a second char, it must be 'e'. It will set + // the LZMA_PRESET_EXTREME flag. + if (argv[1][1] != '\0') { + if (argv[1][1] != 'e' || argv[1][2] != '\0') + show_usage_and_exit(argv[0]); + + preset |= LZMA_PRESET_EXTREME; + } + + return preset; +} + + +static bool +init_encoder(lzma_stream *strm, uint32_t preset) +{ + // Initialize the encoder using a preset. Set the integrity to check + // to CRC64, which is the default in the xz command line tool. If + // the .xz file needs to be decompressed with XZ Embedded, use + // LZMA_CHECK_CRC32 instead. + lzma_ret ret = lzma_easy_encoder(strm, preset, LZMA_CHECK_CRC64); + + // Return successfully if the initialization went fine. + if (ret == LZMA_OK) + return true; + + // Something went wrong. The possible errors are documented in + // lzma/container.h (src/liblzma/api/lzma/container.h in the source + // package or e.g. /usr/include/lzma/container.h depending on the + // install prefix). + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_OPTIONS_ERROR: + msg = "Specified preset is not supported"; + break; + + case LZMA_UNSUPPORTED_CHECK: + msg = "Specified integrity check is not supported"; + break; + + default: + // This is most likely LZMA_PROG_ERROR indicating a bug in + // this program or in liblzma. It is inconvenient to have a + // separate error message for errors that should be impossible + // to occur, but knowing the error code is important for + // debugging. That's why it is good to print the error code + // at least when there is no good error message to show. + msg = "Unknown error, possibly a bug"; + break; + } + + fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n", + msg, ret); + return false; +} + + +static bool +compress(lzma_stream *strm, FILE *infile, FILE *outfile) +{ + // This will be LZMA_RUN until the end of the input file is reached. + // This tells lzma_code() when there will be no more input. + lzma_action action = LZMA_RUN; + + // Buffers to temporarily hold uncompressed input + // and compressed output. + uint8_t inbuf[BUFSIZ]; + uint8_t outbuf[BUFSIZ]; + + // Initialize the input and output pointers. Initializing next_in + // and avail_in isn't really necessary when we are going to encode + // just one file since LZMA_STREAM_INIT takes care of initializing + // those already. But it doesn't hurt much and it will be needed + // if encoding more than one file like we will in 02_decompress.c. + // + // While we don't care about strm->total_in or strm->total_out in this + // example, it is worth noting that initializing the encoder will + // always reset total_in and total_out to zero. But the encoder + // initialization doesn't touch next_in, avail_in, next_out, or + // avail_out. + strm->next_in = NULL; + strm->avail_in = 0; + strm->next_out = outbuf; + strm->avail_out = sizeof(outbuf); + + // Loop until the file has been successfully compressed or until + // an error occurs. + while (true) { + // Fill the input buffer if it is empty. + if (strm->avail_in == 0 && !feof(infile)) { + strm->next_in = inbuf; + strm->avail_in = fread(inbuf, 1, sizeof(inbuf), + infile); + + if (ferror(infile)) { + fprintf(stderr, "Read error: %s\n", + strerror(errno)); + return false; + } + + // Once the end of the input file has been reached, + // we need to tell lzma_code() that no more input + // will be coming and that it should finish the + // encoding. + if (feof(infile)) + action = LZMA_FINISH; + } + + // Tell liblzma do the actual encoding. + // + // This reads up to strm->avail_in bytes of input starting + // from strm->next_in. avail_in will be decremented and + // next_in incremented by an equal amount to match the + // number of input bytes consumed. + // + // Up to strm->avail_out bytes of compressed output will be + // written starting from strm->next_out. avail_out and next_out + // will be incremented by an equal amount to match the number + // of output bytes written. + // + // The encoder has to do internal buffering, which means that + // it may take quite a bit of input before the same data is + // available in compressed form in the output buffer. + lzma_ret ret = lzma_code(strm, action); + + // If the output buffer is full or if the compression finished + // successfully, write the data from the output bufffer to + // the output file. + if (strm->avail_out == 0 || ret == LZMA_STREAM_END) { + // When lzma_code() has returned LZMA_STREAM_END, + // the output buffer is likely to be only partially + // full. Calculate how much new data there is to + // be written to the output file. + size_t write_size = sizeof(outbuf) - strm->avail_out; + + if (fwrite(outbuf, 1, write_size, outfile) + != write_size) { + fprintf(stderr, "Write error: %s\n", + strerror(errno)); + return false; + } + + // Reset next_out and avail_out. + strm->next_out = outbuf; + strm->avail_out = sizeof(outbuf); + } + + // Normally the return value of lzma_code() will be LZMA_OK + // until everything has been encoded. + if (ret != LZMA_OK) { + // Once everything has been encoded successfully, the + // return value of lzma_code() will be LZMA_STREAM_END. + // + // It is important to check for LZMA_STREAM_END. Do not + // assume that getting ret != LZMA_OK would mean that + // everything has gone well. + if (ret == LZMA_STREAM_END) + return true; + + // It's not LZMA_OK nor LZMA_STREAM_END, + // so it must be an error code. See lzma/base.h + // (src/liblzma/api/lzma/base.h in the source package + // or e.g. /usr/include/lzma/base.h depending on the + // install prefix) for the list and documentation of + // possible values. Most values listen in lzma_ret + // enumeration aren't possible in this example. + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_DATA_ERROR: + // This error is returned if the compressed + // or uncompressed size get near 8 EiB + // (2^63 bytes) because that's where the .xz + // file format size limits currently are. + // That is, the possibility of this error + // is mostly theoretical unless you are doing + // something very unusual. + // + // Note that strm->total_in and strm->total_out + // have nothing to do with this error. Changing + // those variables won't increase or decrease + // the chance of getting this error. + msg = "File size limits exceeded"; + break; + + default: + // This is most likely LZMA_PROG_ERROR, but + // if this program is buggy (or liblzma has + // a bug), it may be e.g. LZMA_BUF_ERROR or + // LZMA_OPTIONS_ERROR too. + // + // It is inconvenient to have a separate + // error message for errors that should be + // impossible to occur, but knowing the error + // code is important for debugging. That's why + // it is good to print the error code at least + // when there is no good error message to show. + msg = "Unknown error, possibly a bug"; + break; + } + + fprintf(stderr, "Encoder error: %s (error code %u)\n", + msg, ret); + return false; + } + } +} + + +extern int +main(int argc, char **argv) +{ + // Get the preset number from the command line. + uint32_t preset = get_preset(argc, argv); + + // Initialize a lzma_stream structure. When it is allocated on stack, + // it is simplest to use LZMA_STREAM_INIT macro like below. When it + // is allocated on heap, using memset(strmptr, 0, sizeof(*strmptr)) + // works (as long as NULL pointers are represented with zero bits + // as they are on practically all computers today). + lzma_stream strm = LZMA_STREAM_INIT; + + // Initialize the encoder. If it succeeds, compress from + // stdin to stdout. + bool success = init_encoder(&strm, preset); + if (success) + success = compress(&strm, stdin, stdout); + + // Free the memory allocated for the encoder. If we were encoding + // multiple files, this would only need to be done after the last + // file. See 02_decompress.c for handling of multiple files. + // + // It is OK to call lzma_end() multiple times or when it hasn't been + // actually used except initialized with LZMA_STREAM_INIT. + lzma_end(&strm); + + // Close stdout to catch possible write errors that can occur + // when pending data is flushed from the stdio buffers. + if (fclose(stdout)) { + fprintf(stderr, "Write error: %s\n", strerror(errno)); + success = false; + } + + return success ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/usr/share/doc/xz/examples/02_decompress.c b/usr/share/doc/xz/examples/02_decompress.c new file mode 100644 index 0000000..4c0f37c --- /dev/null +++ b/usr/share/doc/xz/examples/02_decompress.c @@ -0,0 +1,287 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file 02_decompress.c +/// \brief Decompress .xz files to stdout +/// +/// Usage: ./02_decompress INPUT_FILES... > OUTFILE +/// +/// Example: ./02_decompress foo.xz bar.xz > foobar +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include + + +static bool +init_decoder(lzma_stream *strm) +{ + // Initialize a .xz decoder. The decoder supports a memory usage limit + // and a set of flags. + // + // The memory usage of the decompressor depends on the settings used + // to compress a .xz file. It can vary from less than a megabyte to + // a few gigabytes, but in practice (at least for now) it rarely + // exceeds 65 MiB because that's how much memory is required to + // decompress files created with "xz -9". Settings requiring more + // memory take extra effort to use and don't (at least for now) + // provide significantly better compression in most cases. + // + // Memory usage limit is useful if it is important that the + // decompressor won't consume gigabytes of memory. The need + // for limiting depends on the application. In this example, + // no memory usage limiting is used. This is done by setting + // the limit to UINT64_MAX. + // + // The .xz format allows concatenating compressed files as is: + // + // echo foo | xz > foobar.xz + // echo bar | xz >> foobar.xz + // + // When decompressing normal standalone .xz files, LZMA_CONCATENATED + // should always be used to support decompression of concatenated + // .xz files. If LZMA_CONCATENATED isn't used, the decoder will stop + // after the first .xz stream. This can be useful when .xz data has + // been embedded inside another file format. + // + // Flags other than LZMA_CONCATENATED are supported too, and can + // be combined with bitwise-or. See lzma/container.h + // (src/liblzma/api/lzma/container.h in the source package or e.g. + // /usr/include/lzma/container.h depending on the install prefix) + // for details. + lzma_ret ret = lzma_stream_decoder( + strm, UINT64_MAX, LZMA_CONCATENATED); + + // Return successfully if the initialization went fine. + if (ret == LZMA_OK) + return true; + + // Something went wrong. The possible errors are documented in + // lzma/container.h (src/liblzma/api/lzma/container.h in the source + // package or e.g. /usr/include/lzma/container.h depending on the + // install prefix). + // + // Note that LZMA_MEMLIMIT_ERROR is never possible here. If you + // specify a very tiny limit, the error will be delayed until + // the first headers have been parsed by a call to lzma_code(). + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_OPTIONS_ERROR: + msg = "Unsupported decompressor flags"; + break; + + default: + // This is most likely LZMA_PROG_ERROR indicating a bug in + // this program or in liblzma. It is inconvenient to have a + // separate error message for errors that should be impossible + // to occur, but knowing the error code is important for + // debugging. That's why it is good to print the error code + // at least when there is no good error message to show. + msg = "Unknown error, possibly a bug"; + break; + } + + fprintf(stderr, "Error initializing the decoder: %s (error code %u)\n", + msg, ret); + return false; +} + + +static bool +decompress(lzma_stream *strm, const char *inname, FILE *infile, FILE *outfile) +{ + // When LZMA_CONCATENATED flag was used when initializing the decoder, + // we need to tell lzma_code() when there will be no more input. + // This is done by setting action to LZMA_FINISH instead of LZMA_RUN + // in the same way as it is done when encoding. + // + // When LZMA_CONCATENATED isn't used, there is no need to use + // LZMA_FINISH to tell when all the input has been read, but it + // is still OK to use it if you want. When LZMA_CONCATENATED isn't + // used, the decoder will stop after the first .xz stream. In that + // case some unused data may be left in strm->next_in. + lzma_action action = LZMA_RUN; + + uint8_t inbuf[BUFSIZ]; + uint8_t outbuf[BUFSIZ]; + + strm->next_in = NULL; + strm->avail_in = 0; + strm->next_out = outbuf; + strm->avail_out = sizeof(outbuf); + + while (true) { + if (strm->avail_in == 0 && !feof(infile)) { + strm->next_in = inbuf; + strm->avail_in = fread(inbuf, 1, sizeof(inbuf), + infile); + + if (ferror(infile)) { + fprintf(stderr, "%s: Read error: %s\n", + inname, strerror(errno)); + return false; + } + + // Once the end of the input file has been reached, + // we need to tell lzma_code() that no more input + // will be coming. As said before, this isn't required + // if the LZMA_CONATENATED flag isn't used when + // initializing the decoder. + if (feof(infile)) + action = LZMA_FINISH; + } + + lzma_ret ret = lzma_code(strm, action); + + if (strm->avail_out == 0 || ret == LZMA_STREAM_END) { + size_t write_size = sizeof(outbuf) - strm->avail_out; + + if (fwrite(outbuf, 1, write_size, outfile) + != write_size) { + fprintf(stderr, "Write error: %s\n", + strerror(errno)); + return false; + } + + strm->next_out = outbuf; + strm->avail_out = sizeof(outbuf); + } + + if (ret != LZMA_OK) { + // Once everything has been decoded successfully, the + // return value of lzma_code() will be LZMA_STREAM_END. + // + // It is important to check for LZMA_STREAM_END. Do not + // assume that getting ret != LZMA_OK would mean that + // everything has gone well or that when you aren't + // getting more output it must have successfully + // decoded everything. + if (ret == LZMA_STREAM_END) + return true; + + // It's not LZMA_OK nor LZMA_STREAM_END, + // so it must be an error code. See lzma/base.h + // (src/liblzma/api/lzma/base.h in the source package + // or e.g. /usr/include/lzma/base.h depending on the + // install prefix) for the list and documentation of + // possible values. Many values listen in lzma_ret + // enumeration aren't possible in this example, but + // can be made possible by enabling memory usage limit + // or adding flags to the decoder initialization. + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_FORMAT_ERROR: + // .xz magic bytes weren't found. + msg = "The input is not in the .xz format"; + break; + + case LZMA_OPTIONS_ERROR: + // For example, the headers specify a filter + // that isn't supported by this liblzma + // version (or it hasn't been enabled when + // building liblzma, but no-one sane does + // that unless building liblzma for an + // embedded system). Upgrading to a newer + // liblzma might help. + // + // Note that it is unlikely that the file has + // accidentally became corrupt if you get this + // error. The integrity of the .xz headers is + // always verified with a CRC32, so + // unintentionally corrupt files can be + // distinguished from unsupported files. + msg = "Unsupported compression options"; + break; + + case LZMA_DATA_ERROR: + msg = "Compressed file is corrupt"; + break; + + case LZMA_BUF_ERROR: + // Typically this error means that a valid + // file has got truncated, but it might also + // be a damaged part in the file that makes + // the decoder think the file is truncated. + // If you prefer, you can use the same error + // message for this as for LZMA_DATA_ERROR. + msg = "Compressed file is truncated or " + "otherwise corrupt"; + break; + + default: + // This is most likely LZMA_PROG_ERROR. + msg = "Unknown error, possibly a bug"; + break; + } + + fprintf(stderr, "%s: Decoder error: " + "%s (error code %u)\n", + inname, msg, ret); + return false; + } + } +} + + +extern int +main(int argc, char **argv) +{ + if (argc <= 1) { + fprintf(stderr, "Usage: %s FILES...\n", argv[0]); + return EXIT_FAILURE; + } + + lzma_stream strm = LZMA_STREAM_INIT; + + bool success = true; + + // Try to decompress all files. + for (int i = 1; i < argc; ++i) { + if (!init_decoder(&strm)) { + // Decoder initialization failed. There's no point + // to retry it so we need to exit. + success = false; + break; + } + + FILE *infile = fopen(argv[i], "rb"); + + if (infile == NULL) { + fprintf(stderr, "%s: Error opening the " + "input file: %s\n", + argv[i], strerror(errno)); + success = false; + } else { + success &= decompress(&strm, argv[i], infile, stdout); + fclose(infile); + } + } + + // Free the memory allocated for the decoder. This only needs to be + // done after the last file. + lzma_end(&strm); + + if (fclose(stdout)) { + fprintf(stderr, "Write error: %s\n", strerror(errno)); + success = false; + } + + return success ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/usr/share/doc/xz/examples/03_compress_custom.c b/usr/share/doc/xz/examples/03_compress_custom.c new file mode 100644 index 0000000..40c85e3 --- /dev/null +++ b/usr/share/doc/xz/examples/03_compress_custom.c @@ -0,0 +1,193 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file 03_compress_custom.c +/// \brief Compress in multi-call mode using x86 BCJ and LZMA2 +/// +/// Usage: ./03_compress_custom < INFILE > OUTFILE +/// +/// Example: ./03_compress_custom < foo > foo.xz +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include + + +static bool +init_encoder(lzma_stream *strm) +{ + // Use the default preset (6) for LZMA2. + // + // The lzma_options_lzma structure and the lzma_lzma_preset() function + // are declared in lzma/lzma12.h (src/liblzma/api/lzma/lzma12.h in the + // source package or e.g. /usr/include/lzma/lzma12.h depending on + // the install prefix). + lzma_options_lzma opt_lzma2; + if (lzma_lzma_preset(&opt_lzma2, LZMA_PRESET_DEFAULT)) { + // It should never fail because the default preset + // (and presets 0-9 optionally with LZMA_PRESET_EXTREME) + // are supported by all stable liblzma versions. + // + // (The encoder initialization later in this function may + // still fail due to unsupported preset *if* the features + // required by the preset have been disabled at build time, + // but no-one does such things except on embedded systems.) + fprintf(stderr, "Unsupported preset, possibly a bug\n"); + return false; + } + + // Now we could customize the LZMA2 options if we wanted. For example, + // we could set the the dictionary size (opt_lzma2.dict_size) to + // something else than the default (8 MiB) of the default preset. + // See lzma/lzma12.h for details of all LZMA2 options. + // + // The x86 BCJ filter will try to modify the x86 instruction stream so + // that LZMA2 can compress it better. The x86 BCJ filter doesn't need + // any options so it will be set to NULL below. + // + // Construct the filter chain. The uncompressed data goes first to + // the first filter in the array, in this case the x86 BCJ filter. + // The array is always terminated by setting .id = LZMA_VLI_UNKNOWN. + // + // See lzma/filter.h for more information about the lzma_filter + // structure. + lzma_filter filters[] = { + { .id = LZMA_FILTER_X86, .options = NULL }, + { .id = LZMA_FILTER_LZMA2, .options = &opt_lzma2 }, + { .id = LZMA_VLI_UNKNOWN, .options = NULL }, + }; + + // Initialize the encoder using the custom filter chain. + lzma_ret ret = lzma_stream_encoder(strm, filters, LZMA_CHECK_CRC64); + + if (ret == LZMA_OK) + return true; + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_OPTIONS_ERROR: + // We are no longer using a plain preset so this error + // message has been edited accordingly compared to + // 01_compress_easy.c. + msg = "Specified filter chain is not supported"; + break; + + case LZMA_UNSUPPORTED_CHECK: + msg = "Specified integrity check is not supported"; + break; + + default: + msg = "Unknown error, possibly a bug"; + break; + } + + fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n", + msg, ret); + return false; +} + + +// This function is identical to the one in 01_compress_easy.c. +static bool +compress(lzma_stream *strm, FILE *infile, FILE *outfile) +{ + lzma_action action = LZMA_RUN; + + uint8_t inbuf[BUFSIZ]; + uint8_t outbuf[BUFSIZ]; + + strm->next_in = NULL; + strm->avail_in = 0; + strm->next_out = outbuf; + strm->avail_out = sizeof(outbuf); + + while (true) { + if (strm->avail_in == 0 && !feof(infile)) { + strm->next_in = inbuf; + strm->avail_in = fread(inbuf, 1, sizeof(inbuf), + infile); + + if (ferror(infile)) { + fprintf(stderr, "Read error: %s\n", + strerror(errno)); + return false; + } + + if (feof(infile)) + action = LZMA_FINISH; + } + + lzma_ret ret = lzma_code(strm, action); + + if (strm->avail_out == 0 || ret == LZMA_STREAM_END) { + size_t write_size = sizeof(outbuf) - strm->avail_out; + + if (fwrite(outbuf, 1, write_size, outfile) + != write_size) { + fprintf(stderr, "Write error: %s\n", + strerror(errno)); + return false; + } + + strm->next_out = outbuf; + strm->avail_out = sizeof(outbuf); + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) + return true; + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_DATA_ERROR: + msg = "File size limits exceeded"; + break; + + default: + msg = "Unknown error, possibly a bug"; + break; + } + + fprintf(stderr, "Encoder error: %s (error code %u)\n", + msg, ret); + return false; + } + } +} + + +extern int +main(void) +{ + lzma_stream strm = LZMA_STREAM_INIT; + + bool success = init_encoder(&strm); + if (success) + success = compress(&strm, stdin, stdout); + + lzma_end(&strm); + + if (fclose(stdout)) { + fprintf(stderr, "Write error: %s\n", strerror(errno)); + success = false; + } + + return success ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/usr/share/doc/xz/examples/04_compress_easy_mt.c b/usr/share/doc/xz/examples/04_compress_easy_mt.c new file mode 100644 index 0000000..efe5697 --- /dev/null +++ b/usr/share/doc/xz/examples/04_compress_easy_mt.c @@ -0,0 +1,206 @@ +/////////////////////////////////////////////////////////////////////////////// +// +/// \file 04_compress_easy_mt.c +/// \brief Compress in multi-call mode using LZMA2 in multi-threaded mode +/// +/// Usage: ./04_compress_easy_mt < INFILE > OUTFILE +/// +/// Example: ./04_compress_easy_mt < foo > foo.xz +// +// Author: Lasse Collin +// +// This file has been put into the public domain. +// You can do whatever you want with this file. +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include + + +static bool +init_encoder(lzma_stream *strm) +{ + // The threaded encoder takes the options as pointer to + // a lzma_mt structure. + lzma_mt mt = { + // No flags are needed. + .flags = 0, + + // Let liblzma determine a sane block size. + .block_size = 0, + + // Use no timeout for lzma_code() calls by setting timeout + // to zero. That is, sometimes lzma_code() might block for + // a long time (from several seconds to even minutes). + // If this is not OK, for example due to progress indicator + // needing updates, specify a timeout in milliseconds here. + // See the documentation of lzma_mt in lzma/container.h for + // information how to choose a reasonable timeout. + .timeout = 0, + + // Use the default preset (6) for LZMA2. + // To use a preset, filters must be set to NULL. + .preset = LZMA_PRESET_DEFAULT, + .filters = NULL, + + // Use CRC64 for integrity checking. See also + // 01_compress_easy.c about choosing the integrity check. + .check = LZMA_CHECK_CRC64, + }; + + // Detect how many threads the CPU supports. + mt.threads = lzma_cputhreads(); + + // If the number of CPU cores/threads cannot be detected, + // use one thread. Note that this isn't the same as the normal + // single-threaded mode as this will still split the data into + // blocks and use more RAM than the normal single-threaded mode. + // You may want to consider using lzma_easy_encoder() or + // lzma_stream_encoder() instead of lzma_stream_encoder_mt() if + // lzma_cputhreads() returns 0 or 1. + if (mt.threads == 0) + mt.threads = 1; + + // If the number of CPU cores/threads exceeds threads_max, + // limit the number of threads to keep memory usage lower. + // The number 8 is arbitrarily chosen and may be too low or + // high depending on the compression preset and the computer + // being used. + // + // FIXME: A better way could be to check the amount of RAM + // (or available RAM) and use lzma_stream_encoder_mt_memusage() + // to determine if the number of threads should be reduced. + const uint32_t threads_max = 8; + if (mt.threads > threads_max) + mt.threads = threads_max; + + // Initialize the threaded encoder. + lzma_ret ret = lzma_stream_encoder_mt(strm, &mt); + + if (ret == LZMA_OK) + return true; + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_OPTIONS_ERROR: + // We are no longer using a plain preset so this error + // message has been edited accordingly compared to + // 01_compress_easy.c. + msg = "Specified filter chain is not supported"; + break; + + case LZMA_UNSUPPORTED_CHECK: + msg = "Specified integrity check is not supported"; + break; + + default: + msg = "Unknown error, possibly a bug"; + break; + } + + fprintf(stderr, "Error initializing the encoder: %s (error code %u)\n", + msg, ret); + return false; +} + + +// This function is identical to the one in 01_compress_easy.c. +static bool +compress(lzma_stream *strm, FILE *infile, FILE *outfile) +{ + lzma_action action = LZMA_RUN; + + uint8_t inbuf[BUFSIZ]; + uint8_t outbuf[BUFSIZ]; + + strm->next_in = NULL; + strm->avail_in = 0; + strm->next_out = outbuf; + strm->avail_out = sizeof(outbuf); + + while (true) { + if (strm->avail_in == 0 && !feof(infile)) { + strm->next_in = inbuf; + strm->avail_in = fread(inbuf, 1, sizeof(inbuf), + infile); + + if (ferror(infile)) { + fprintf(stderr, "Read error: %s\n", + strerror(errno)); + return false; + } + + if (feof(infile)) + action = LZMA_FINISH; + } + + lzma_ret ret = lzma_code(strm, action); + + if (strm->avail_out == 0 || ret == LZMA_STREAM_END) { + size_t write_size = sizeof(outbuf) - strm->avail_out; + + if (fwrite(outbuf, 1, write_size, outfile) + != write_size) { + fprintf(stderr, "Write error: %s\n", + strerror(errno)); + return false; + } + + strm->next_out = outbuf; + strm->avail_out = sizeof(outbuf); + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) + return true; + + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = "Memory allocation failed"; + break; + + case LZMA_DATA_ERROR: + msg = "File size limits exceeded"; + break; + + default: + msg = "Unknown error, possibly a bug"; + break; + } + + fprintf(stderr, "Encoder error: %s (error code %u)\n", + msg, ret); + return false; + } + } +} + + +extern int +main(void) +{ + lzma_stream strm = LZMA_STREAM_INIT; + + bool success = init_encoder(&strm); + if (success) + success = compress(&strm, stdin, stdout); + + lzma_end(&strm); + + if (fclose(stdout)) { + fprintf(stderr, "Write error: %s\n", strerror(errno)); + success = false; + } + + return success ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/usr/share/doc/xz/examples/Makefile b/usr/share/doc/xz/examples/Makefile new file mode 100644 index 0000000..0f3d185 --- /dev/null +++ b/usr/share/doc/xz/examples/Makefile @@ -0,0 +1,24 @@ +# +# Author: Lasse Collin +# +# This file has been put into the public domain. +# You can do whatever you want with this file. +# + +CC = c99 +CFLAGS = -g +LDFLAGS = -llzma + +PROGS = \ + 01_compress_easy \ + 02_decompress \ + 03_compress_custom \ + 04_compress_easy_mt + +all: $(PROGS) + +.c: + $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) + +clean: + -rm -f $(PROGS) diff --git a/usr/share/doc/xz/examples_old/xz_pipe_comp.c b/usr/share/doc/xz/examples_old/xz_pipe_comp.c new file mode 100644 index 0000000..9f9224b --- /dev/null +++ b/usr/share/doc/xz/examples_old/xz_pipe_comp.c @@ -0,0 +1,127 @@ +/* + * xz_pipe_comp.c + * A simple example of pipe-only xz compressor implementation. + * version: 2010-07-12 - by Daniel Mealha Cabrita + * Not copyrighted -- provided to the public domain. + * + * Compiling: + * Link with liblzma. GCC example: + * $ gcc -llzma xz_pipe_comp.c -o xz_pipe_comp + * + * Usage example: + * $ cat some_file | ./xz_pipe_comp > some_file.xz + */ + +#include +#include +#include +#include +#include + + +/* COMPRESSION SETTINGS */ + +/* analogous to xz CLI options: -0 to -9 */ +#define COMPRESSION_LEVEL 6 + +/* boolean setting, analogous to xz CLI option: -e */ +#define COMPRESSION_EXTREME true + +/* see: /usr/include/lzma/check.h LZMA_CHECK_* */ +#define INTEGRITY_CHECK LZMA_CHECK_CRC64 + + +/* read/write buffer sizes */ +#define IN_BUF_MAX 4096 +#define OUT_BUF_MAX 4096 + +/* error codes */ +#define RET_OK 0 +#define RET_ERROR_INIT 1 +#define RET_ERROR_INPUT 2 +#define RET_ERROR_OUTPUT 3 +#define RET_ERROR_COMPRESSION 4 + + +/* note: in_file and out_file must be open already */ +int xz_compress (FILE *in_file, FILE *out_file) +{ + uint32_t preset = COMPRESSION_LEVEL | (COMPRESSION_EXTREME ? LZMA_PRESET_EXTREME : 0); + lzma_check check = INTEGRITY_CHECK; + lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */ + uint8_t in_buf [IN_BUF_MAX]; + uint8_t out_buf [OUT_BUF_MAX]; + size_t in_len; /* length of useful data in in_buf */ + size_t out_len; /* length of useful data in out_buf */ + bool in_finished = false; + bool out_finished = false; + lzma_action action; + lzma_ret ret_xz; + int ret; + + ret = RET_OK; + + /* initialize xz encoder */ + ret_xz = lzma_easy_encoder (&strm, preset, check); + if (ret_xz != LZMA_OK) { + fprintf (stderr, "lzma_easy_encoder error: %d\n", (int) ret_xz); + return RET_ERROR_INIT; + } + + while ((! in_finished) && (! out_finished)) { + /* read incoming data */ + in_len = fread (in_buf, 1, IN_BUF_MAX, in_file); + + if (feof (in_file)) { + in_finished = true; + } + if (ferror (in_file)) { + in_finished = true; + ret = RET_ERROR_INPUT; + } + + strm.next_in = in_buf; + strm.avail_in = in_len; + + /* if no more data from in_buf, flushes the + internal xz buffers and closes the xz data + with LZMA_FINISH */ + action = in_finished ? LZMA_FINISH : LZMA_RUN; + + /* loop until there's no pending compressed output */ + do { + /* out_buf is clean at this point */ + strm.next_out = out_buf; + strm.avail_out = OUT_BUF_MAX; + + /* compress data */ + ret_xz = lzma_code (&strm, action); + + if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) { + fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz); + out_finished = true; + ret = RET_ERROR_COMPRESSION; + } else { + /* write compressed data */ + out_len = OUT_BUF_MAX - strm.avail_out; + fwrite (out_buf, 1, out_len, out_file); + if (ferror (out_file)) { + out_finished = true; + ret = RET_ERROR_OUTPUT; + } + } + } while (strm.avail_out == 0); + } + + lzma_end (&strm); + return ret; +} + +int main () +{ + int ret; + + ret = xz_compress (stdin, stdout); + return ret; +} + diff --git a/usr/share/doc/xz/examples_old/xz_pipe_decomp.c b/usr/share/doc/xz/examples_old/xz_pipe_decomp.c new file mode 100644 index 0000000..fb5ad89 --- /dev/null +++ b/usr/share/doc/xz/examples_old/xz_pipe_decomp.c @@ -0,0 +1,123 @@ +/* + * xz_pipe_decomp.c + * A simple example of pipe-only xz decompressor implementation. + * version: 2012-06-14 - by Daniel Mealha Cabrita + * Not copyrighted -- provided to the public domain. + * + * Compiling: + * Link with liblzma. GCC example: + * $ gcc -llzma xz_pipe_decomp.c -o xz_pipe_decomp + * + * Usage example: + * $ cat some_file.xz | ./xz_pipe_decomp > some_file + */ + +#include +#include +#include +#include +#include + + +/* read/write buffer sizes */ +#define IN_BUF_MAX 4096 +#define OUT_BUF_MAX 4096 + +/* error codes */ +#define RET_OK 0 +#define RET_ERROR_INIT 1 +#define RET_ERROR_INPUT 2 +#define RET_ERROR_OUTPUT 3 +#define RET_ERROR_DECOMPRESSION 4 + + +/* note: in_file and out_file must be open already */ +int xz_decompress (FILE *in_file, FILE *out_file) +{ + lzma_stream strm = LZMA_STREAM_INIT; /* alloc and init lzma_stream struct */ + const uint32_t flags = LZMA_TELL_UNSUPPORTED_CHECK | LZMA_CONCATENATED; + const uint64_t memory_limit = UINT64_MAX; /* no memory limit */ + uint8_t in_buf [IN_BUF_MAX]; + uint8_t out_buf [OUT_BUF_MAX]; + size_t in_len; /* length of useful data in in_buf */ + size_t out_len; /* length of useful data in out_buf */ + bool in_finished = false; + bool out_finished = false; + lzma_action action; + lzma_ret ret_xz; + int ret; + + ret = RET_OK; + + /* initialize xz decoder */ + ret_xz = lzma_stream_decoder (&strm, memory_limit, flags); + if (ret_xz != LZMA_OK) { + fprintf (stderr, "lzma_stream_decoder error: %d\n", (int) ret_xz); + return RET_ERROR_INIT; + } + + while ((! in_finished) && (! out_finished)) { + /* read incoming data */ + in_len = fread (in_buf, 1, IN_BUF_MAX, in_file); + + if (feof (in_file)) { + in_finished = true; + } + if (ferror (in_file)) { + in_finished = true; + ret = RET_ERROR_INPUT; + } + + strm.next_in = in_buf; + strm.avail_in = in_len; + + /* if no more data from in_buf, flushes the + internal xz buffers and closes the decompressed data + with LZMA_FINISH */ + action = in_finished ? LZMA_FINISH : LZMA_RUN; + + /* loop until there's no pending decompressed output */ + do { + /* out_buf is clean at this point */ + strm.next_out = out_buf; + strm.avail_out = OUT_BUF_MAX; + + /* decompress data */ + ret_xz = lzma_code (&strm, action); + + if ((ret_xz != LZMA_OK) && (ret_xz != LZMA_STREAM_END)) { + fprintf (stderr, "lzma_code error: %d\n", (int) ret_xz); + out_finished = true; + ret = RET_ERROR_DECOMPRESSION; + } else { + /* write decompressed data */ + out_len = OUT_BUF_MAX - strm.avail_out; + fwrite (out_buf, 1, out_len, out_file); + if (ferror (out_file)) { + out_finished = true; + ret = RET_ERROR_OUTPUT; + } + } + } while (strm.avail_out == 0); + } + + /* Bug fix (2012-06-14): If no errors were detected, check + that the last lzma_code() call returned LZMA_STREAM_END. + If not, the file is probably truncated. */ + if ((ret == RET_OK) && (ret_xz != LZMA_STREAM_END)) { + fprintf (stderr, "Input truncated or corrupt\n"); + ret = RET_ERROR_DECOMPRESSION; + } + + lzma_end (&strm); + return ret; +} + +int main () +{ + int ret; + + ret = xz_decompress (stdin, stdout); + return ret; +} + diff --git a/usr/share/doc/xz/faq.txt b/usr/share/doc/xz/faq.txt new file mode 100644 index 0000000..333bee0 --- /dev/null +++ b/usr/share/doc/xz/faq.txt @@ -0,0 +1,224 @@ + +XZ Utils FAQ +============ + +Q: What do the letters XZ mean? + +A: Nothing. They are just two letters, which come from the file format + suffix .xz. The .xz suffix was selected, because it seemed to be + pretty much unused. It has no deeper meaning. + + +Q: What are LZMA and LZMA2? + +A: LZMA stands for Lempel-Ziv-Markov chain-Algorithm. It is the name + of the compression algorithm designed by Igor Pavlov for 7-Zip. + LZMA is based on LZ77 and range encoding. + + LZMA2 is an updated version of the original LZMA to fix a couple of + practical issues. In context of XZ Utils, LZMA is called LZMA1 to + emphasize that LZMA is not the same thing as LZMA2. LZMA2 is the + primary compression algorithm in the .xz file format. + + +Q: There are many LZMA related projects. How does XZ Utils relate to them? + +A: 7-Zip and LZMA SDK are the original projects. LZMA SDK is roughly + a subset of the 7-Zip source tree. + + p7zip is 7-Zip's command-line tools ported to POSIX-like systems. + + LZMA Utils provide a gzip-like lzma tool for POSIX-like systems. + LZMA Utils are based on LZMA SDK. XZ Utils are the successor to + LZMA Utils. + + There are several other projects using LZMA. Most are more or less + based on LZMA SDK. See . + + +Q: Why is liblzma named liblzma if its primary file format is .xz? + Shouldn't it be e.g. libxz? + +A: When the designing of the .xz format began, the idea was to replace + the .lzma format and use the same .lzma suffix. It would have been + quite OK to reuse the suffix when there were very few .lzma files + around. However, the old .lzma format became popular before the + new format was finished. The new format was renamed to .xz but the + name of liblzma wasn't changed. + + +Q: Do XZ Utils support the .7z format? + +A: No. Use 7-Zip (Windows) or p7zip (POSIX-like systems) to handle .7z + files. + + +Q: I have many .tar.7z files. Can I convert them to .tar.xz without + spending hours recompressing the data? + +A: In the "extra" directory, there is a script named 7z2lzma.bash which + is able to convert some .7z files to the .lzma format (not .xz). It + needs the 7za (or 7z) command from p7zip. The script may silently + produce corrupt output if certain assumptions are not met, so + decompress the resulting .lzma file and compare it against the + original before deleting the original file! + + +Q: I have many .lzma files. Can I quickly convert them to the .xz format? + +A: For now, no. Since XZ Utils supports the .lzma format, it's usually + not too bad to keep the old files in the old format. If you want to + do the conversion anyway, you need to decompress the .lzma files and + then recompress to the .xz format. + + Technically, there is a way to make the conversion relatively fast + (roughly twice the time that normal decompression takes). Writing + such a tool would take quite a bit of time though, and would probably + be useful to only a few people. If you really want such a conversion + tool, contact Lasse Collin and offer some money. + + +Q: I have installed xz, but my tar doesn't recognize .tar.xz files. + How can I extract .tar.xz files? + +A: xz -dc foo.tar.xz | tar xf - + + +Q: Can I recover parts of a broken .xz file (e.g. a corrupted CD-R)? + +A: It may be possible if the file consists of multiple blocks, which + typically is not the case if the file was created in single-threaded + mode. There is no recovery program yet. + + +Q: Is (some part of) XZ Utils patented? + +A: Lasse Collin is not aware of any patents that could affect XZ Utils. + However, due to the nature of software patents, it's not possible to + guarantee that XZ Utils isn't affected by any third party patent(s). + + +Q: Where can I find documentation about the file format and algorithms? + +A: The .xz format is documented in xz-file-format.txt. It is a container + format only, and doesn't include descriptions of any non-trivial + filters. + + Documenting LZMA and LZMA2 is planned, but for now, there is no other + documentation than the source code. Before you begin, you should know + the basics of LZ77 and range-coding algorithms. LZMA is based on LZ77, + but LZMA is a lot more complex. Range coding is used to compress + the final bitstream like Huffman coding is used in Deflate. + + +Q: I cannot find BCJ and BCJ2 filters. Don't they exist in liblzma? + +A: BCJ filter is called "x86" in liblzma. BCJ2 is not included, + because it requires using more than one encoded output stream. + A streamable version of BCJ2-style filtering is planned. + + +Q: I need to use a script that runs "xz -9". On a system with 256 MiB + of RAM, xz says that it cannot allocate memory. Can I make the + script work without modifying it? + +A: Set a default memory usage limit for compression. You can do it e.g. + in a shell initialization script such as ~/.bashrc or /etc/profile: + + XZ_DEFAULTS=--memlimit-compress=150MiB + export XZ_DEFAULTS + + xz will then scale the compression settings down so that the given + memory usage limit is not reached. This way xz shouldn't run out + of memory. + + Check also that memory-related resource limits are high enough. + On most systems, "ulimit -a" will show the current resource limits. + + +Q: How do I create files that can be decompressed with XZ Embedded? + +A: See the documentation in XZ Embedded. In short, something like + this is a good start: + + xz --check=crc32 --lzma2=preset=6e,dict=64KiB + + Or if a BCJ filter is needed too, e.g. if compressing + a kernel image for PowerPC: + + xz --check=crc32 --powerpc --lzma2=preset=6e,dict=64KiB + + Adjust the dictionary size to get a good compromise between + compression ratio and decompressor memory usage. Note that + in single-call decompression mode of XZ Embedded, a big + dictionary doesn't increase memory usage. + + +Q: Will xz support threaded compression? + +A: It is planned and has been taken into account when designing + the .xz file format. Eventually there will probably be three types + of threading, each method having its own advantages and disadvantages. + + The simplest method is splitting the uncompressed data into blocks + and compressing them in parallel independent from each other. + Since the blocks are compressed independently, they can also be + decompressed independently. Together with the index feature in .xz, + this allows using threads to create .xz files for random-access + reading. This also makes threaded decompression possible, although + it is not clear if threaded decompression will ever be implemented. + + The independent blocks method has a couple of disadvantages too. It + will compress worse than a single-block method. Often the difference + is not too big (maybe 1-2 %) but sometimes it can be too big. Also, + the memory usage of the compressor increases linearly when adding + threads. + + Match finder parallelization is another threading method. It has + been in 7-Zip for ages. It doesn't affect compression ratio or + memory usage significantly. Among the three threading methods, only + this is useful when compressing small files (files that are not + significantly bigger than the dictionary). Unfortunately this method + scales only to about two CPU cores. + + The third method is pigz-style threading (I use that name, because + pigz uses that method). It doesn't + affect compression ratio significantly and scales to many cores. + The memory usage scales linearly when threads are added. This isn't + significant with pigz, because Deflate uses only a 32 KiB dictionary, + but with LZMA2 the memory usage will increase dramatically just like + with the independent-blocks method. There is also a constant + computational overhead, which may make pigz-method a bit dull on + dual-core compared to the parallel match finder method, but with more + cores the overhead is not a big deal anymore. + + Combining the threading methods will be possible and also useful. + E.g. combining match finder parallelization with pigz-style threading + can cut the memory usage by 50 %. + + It is possible that the single-threaded method will be modified to + create files identical to the pigz-style method. We'll see once + pigz-style threading has been implemented in liblzma. + + +Q: How do I build a program that needs liblzmadec (lzmadec.h)? + +A: liblzmadec is part of LZMA Utils. XZ Utils has liblzma, but no + liblzmadec. The code using liblzmadec should be ported to use + liblzma instead. If you cannot or don't want to do that, download + LZMA Utils from . + + +Q: The default build of liblzma is too big. How can I make it smaller? + +A: Give --enable-small to the configure script. Use also appropriate + --enable or --disable options to include only those filter encoders + and decoders and integrity checks that you actually need. Use + CFLAGS=-Os (with GCC) or equivalent to tell your compiler to optimize + for size. See INSTALL for information about configure options. + + If the result is still too big, take a look at XZ Embedded. It is + a separate project, which provides a limited but significantly + smaller XZ decoder implementation than XZ Utils. You can find it + at . + diff --git a/usr/share/doc/xz/history.txt b/usr/share/doc/xz/history.txt new file mode 100644 index 0000000..8545e23 --- /dev/null +++ b/usr/share/doc/xz/history.txt @@ -0,0 +1,150 @@ + +History of LZMA Utils and XZ Utils +================================== + +Tukaani distribution + + In 2005, there was a small group working on the Tukaani distribution, + which was a Slackware fork. One of the project's goals was to fit the + distro on a single 700 MiB ISO-9660 image. Using LZMA instead of gzip + helped a lot. Roughly speaking, one could fit data that took 1000 MiB + in gzipped form into 700 MiB with LZMA. Naturally, the compression + ratio varied across packages, but this was what we got on average. + + Slackware packages have traditionally had .tgz as the filename suffix, + which is an abbreviation of .tar.gz. A logical naming for LZMA + compressed packages was .tlz, being an abbreviation of .tar.lzma. + + At the end of the year 2007, there was no distribution under the + Tukaani project anymore, but development of LZMA Utils was kept going. + Still, there were .tlz packages around, because at least Vector Linux + (a Slackware based distribution) used LZMA for its packages. + + First versions of the modified pkgtools used the LZMA_Alone tool from + Igor Pavlov's LZMA SDK as is. It was fine, because users wouldn't need + to interact with LZMA_Alone directly. But people soon wanted to use + LZMA for other files too, and the interface of LZMA_Alone wasn't + comfortable for those used to gzip and bzip2. + + +First steps of LZMA Utils + + The first version of LZMA Utils (4.22.0) included a shell script called + lzmash. It was a wrapper that had a gzip-like command-line interface. It + used the LZMA_Alone tool from LZMA SDK to do all the real work. zgrep, + zdiff, and related scripts from gzip were adapted to work with LZMA and + were part of the first LZMA Utils release too. + + LZMA Utils 4.22.0 included also lzmadec, which was a small (less than + 10 KiB) decoder-only command-line tool. It was written on top of the + decoder-only C code found from the LZMA SDK. lzmadec was convenient in + situations where LZMA_Alone (a few hundred KiB) would be too big. + + lzmash and lzmadec were written by Lasse Collin. + + +Second generation + + The lzmash script was an ugly and not very secure hack. The last + version of LZMA Utils to use lzmash was 4.27.1. + + LZMA Utils 4.32.0beta1 introduced a new lzma command-line tool written + by Ville Koskinen. It was written in C++, and used the encoder and + decoder from C++ LZMA SDK with some little modifications. This tool + replaced both the lzmash script and the LZMA_Alone command-line tool + in LZMA Utils. + + Introducing this new tool caused some temporary incompatibilities, + because the LZMA_Alone executable was simply named lzma like the new + command-line tool, but they had a completely different command-line + interface. The file format was still the same. + + Lasse wrote liblzmadec, which was a small decoder-only library based + on the C code found from LZMA SDK. liblzmadec had an API similar to + zlib, although there were some significant differences, which made it + non-trivial to use it in some applications designed for zlib and + libbzip2. + + The lzmadec command-line tool was converted to use liblzmadec. + + Alexandre Sauvé helped converting the build system to use GNU + Autotools. This made it easier to test for certain less portable + features needed by the new command-line tool. + + Since the new command-line tool never got completely finished (for + example, it didn't support the LZMA_OPT environment variable), the + intent was to not call 4.32.x stable. Similarly, liblzmadec wasn't + polished, but appeared to work well enough, so some people started + using it too. + + Because the development of the third generation of LZMA Utils was + delayed considerably (3-4 years), the 4.32.x branch had to be kept + maintained. It got some bug fixes now and then, and finally it was + decided to call it stable, although most of the missing features were + never added. + + +File format problems + + The file format used by LZMA_Alone was primitive. It was designed with + embedded systems in mind, and thus provided only a minimal set of + features. The two biggest problems for non-embedded use were the lack + of magic bytes and an integrity check. + + Igor and Lasse started developing a new file format with some help + from Ville Koskinen. Also Mark Adler, Mikko Pouru, H. Peter Anvin, + and Lars Wirzenius helped with some minor things at some point of the + development. Designing the new format took quite a long time (actually, + too long a time would be a more appropriate expression). It was mostly + because Lasse was quite slow at getting things done due to personal + reasons. + + Originally the new format was supposed to use the same .lzma suffix + that was already used by the old file format. Switching to the new + format wouldn't have caused much trouble when the old format wasn't + used by many people. But since the development of the new format took + such a long time, the old format got quite popular, and it was decided + that the new file format must use a different suffix. + + It was decided to use .xz as the suffix of the new file format. The + first stable .xz file format specification was finally released in + December 2008. In addition to fixing the most obvious problems of + the old .lzma format, the .xz format added some new features like + support for multiple filters (compression algorithms), filter chaining + (like piping on the command line), and limited random-access reading. + + Currently the primary compression algorithm used in .xz is LZMA2. + It is an extension on top of the original LZMA to fix some practical + problems: LZMA2 adds support for flushing the encoder, uncompressed + chunks, eases stateful decoder implementations, and improves support + for multithreading. Since LZMA2 is better than the original LZMA, the + original LZMA is not supported in .xz. + + +Transition to XZ Utils + + The early versions of XZ Utils were called LZMA Utils. The first + releases were 4.42.0alphas. They dropped the rest of the C++ LZMA SDK. + The code was still directly based on LZMA SDK but ported to C and + converted from a callback API to a stateful API. Later, Igor Pavlov + made a C version of the LZMA encoder too; these ports from C++ to C + were independent in LZMA SDK and LZMA Utils. + + The core of the new LZMA Utils was liblzma, a compression library with + a zlib-like API. liblzma supported both the old and new file format. + The gzip-like lzma command-line tool was rewritten to use liblzma. + + The new LZMA Utils code base was renamed to XZ Utils when the name + of the new file format had been decided. The liblzma compression + library retained its name though, because changing it would have + caused unnecessary breakage in applications already using the early + liblzma snapshots. + + The xz command-line tool can emulate the gzip-like lzma tool by + creating appropriate symlinks (e.g. lzma -> xz). Thus, practically + all scripts using the lzma tool from LZMA Utils will work as is with + XZ Utils (and will keep using the old .lzma format). Still, the .lzma + format is more or less deprecated. XZ Utils will keep supporting it, + but new applications should use the .xz format, and migrating old + applications to .xz is often a good idea too. + diff --git a/usr/share/doc/xz/lzma-file-format.txt b/usr/share/doc/xz/lzma-file-format.txt new file mode 100644 index 0000000..015b0fa --- /dev/null +++ b/usr/share/doc/xz/lzma-file-format.txt @@ -0,0 +1,166 @@ + +The .lzma File Format +===================== + + 0. Preface + 0.1. Notices and Acknowledgements + 0.2. Changes + 1. File Format + 1.1. Header + 1.1.1. Properties + 1.1.2. Dictionary Size + 1.1.3. Uncompressed Size + 1.2. LZMA Compressed Data + 2. References + + +0. Preface + + This document describes the .lzma file format, which is + sometimes also called LZMA_Alone format. It is a legacy file + format, which is being or has been replaced by the .xz format. + The MIME type of the .lzma format is `application/x-lzma'. + + The most commonly used software to handle .lzma files are + LZMA SDK, LZMA Utils, 7-Zip, and XZ Utils. This document + describes some of the differences between these implementations + and gives hints what subset of the .lzma format is the most + portable. + + +0.1. Notices and Acknowledgements + + This file format was designed by Igor Pavlov for use in + LZMA SDK. This document was written by Lasse Collin + using the documentation found + from the LZMA SDK. + + This document has been put into the public domain. + + +0.2. Changes + + Last modified: 2011-04-12 11:55+0300 + + +1. File Format + + +-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+ + | Header | LZMA Compressed Data | + +-+-+-+-+-+-+-+-+-+-+-+-+-+==========================+ + + The .lzma format file consist of 13-byte Header followed by + the LZMA Compressed Data. + + Unlike the .gz, .bz2, and .xz formats, it is not possible to + concatenate multiple .lzma files as is and expect the + decompression tool to decode the resulting file as if it were + a single .lzma file. + + For example, the command line tools from LZMA Utils and + LZMA SDK silently ignore all the data after the first .lzma + stream. In contrast, the command line tool from XZ Utils + considers the .lzma file to be corrupt if there is data after + the first .lzma stream. + + +1.1. Header + + +------------+----+----+----+----+--+--+--+--+--+--+--+--+ + | Properties | Dictionary Size | Uncompressed Size | + +------------+----+----+----+----+--+--+--+--+--+--+--+--+ + + +1.1.1. Properties + + The Properties field contains three properties. An abbreviation + is given in parentheses, followed by the value range of the + property. The field consists of + + 1) the number of literal context bits (lc, [0, 8]); + 2) the number of literal position bits (lp, [0, 4]); and + 3) the number of position bits (pb, [0, 4]). + + The properties are encoded using the following formula: + + Properties = (pb * 5 + lp) * 9 + lc + + The following C code illustrates a straightforward way to + decode the Properties field: + + uint8_t lc, lp, pb; + uint8_t prop = get_lzma_properties(); + if (prop > (4 * 5 + 4) * 9 + 8) + return LZMA_PROPERTIES_ERROR; + + pb = prop / (9 * 5); + prop -= pb * 9 * 5; + lp = prop / 9; + lc = prop - lp * 9; + + XZ Utils has an additional requirement: lc + lp <= 4. Files + which don't follow this requirement cannot be decompressed + with XZ Utils. Usually this isn't a problem since the most + common lc/lp/pb values are 3/0/2. It is the only lc/lp/pb + combination that the files created by LZMA Utils can have, + but LZMA Utils can decompress files with any lc/lp/pb. + + +1.1.2. Dictionary Size + + Dictionary Size is stored as an unsigned 32-bit little endian + integer. Any 32-bit value is possible, but for maximum + portability, only sizes of 2^n and 2^n + 2^(n-1) should be + used. + + LZMA Utils creates only files with dictionary size 2^n, + 16 <= n <= 25. LZMA Utils can decompress files with any + dictionary size. + + XZ Utils creates and decompresses .lzma files only with + dictionary sizes 2^n and 2^n + 2^(n-1). If some other + dictionary size is specified when compressing, the value + stored in the Dictionary Size field is a rounded up, but the + specified value is still used in the actual compression code. + + +1.1.3. Uncompressed Size + + Uncompressed Size is stored as unsigned 64-bit little endian + integer. A special value of 0xFFFF_FFFF_FFFF_FFFF indicates + that Uncompressed Size is unknown. End of Payload Marker (*) + is used if and only if Uncompressed Size is unknown. + + XZ Utils rejects files whose Uncompressed Size field specifies + a known size that is 256 GiB or more. This is to reject false + positives when trying to guess if the input file is in the + .lzma format. When Uncompressed Size is unknown, there is no + limit for the uncompressed size of the file. + + (*) Some tools use the term End of Stream (EOS) marker + instead of End of Payload Marker. + + +1.2. LZMA Compressed Data + + Detailed description of the format of this field is out of + scope of this document. + + +2. References + + LZMA SDK - The original LZMA implementation + http://7-zip.org/sdk.html + + 7-Zip + http://7-zip.org/ + + LZMA Utils - LZMA adapted to POSIX-like systems + http://tukaani.org/lzma/ + + XZ Utils - The next generation of LZMA Utils + http://tukaani.org/xz/ + + The .xz file format - The successor of the .lzma format + http://tukaani.org/xz/xz-file-format.txt + diff --git a/usr/share/doc/xz/xz-file-format.txt b/usr/share/doc/xz/xz-file-format.txt new file mode 100644 index 0000000..4ed6650 --- /dev/null +++ b/usr/share/doc/xz/xz-file-format.txt @@ -0,0 +1,1150 @@ + +The .xz File Format +=================== + +Version 1.0.4 (2009-08-27) + + + 0. Preface + 0.1. Notices and Acknowledgements + 0.2. Getting the Latest Version + 0.3. Version History + 1. Conventions + 1.1. Byte and Its Representation + 1.2. Multibyte Integers + 2. Overall Structure of .xz File + 2.1. Stream + 2.1.1. Stream Header + 2.1.1.1. Header Magic Bytes + 2.1.1.2. Stream Flags + 2.1.1.3. CRC32 + 2.1.2. Stream Footer + 2.1.2.1. CRC32 + 2.1.2.2. Backward Size + 2.1.2.3. Stream Flags + 2.1.2.4. Footer Magic Bytes + 2.2. Stream Padding + 3. Block + 3.1. Block Header + 3.1.1. Block Header Size + 3.1.2. Block Flags + 3.1.3. Compressed Size + 3.1.4. Uncompressed Size + 3.1.5. List of Filter Flags + 3.1.6. Header Padding + 3.1.7. CRC32 + 3.2. Compressed Data + 3.3. Block Padding + 3.4. Check + 4. Index + 4.1. Index Indicator + 4.2. Number of Records + 4.3. List of Records + 4.3.1. Unpadded Size + 4.3.2. Uncompressed Size + 4.4. Index Padding + 4.5. CRC32 + 5. Filter Chains + 5.1. Alignment + 5.2. Security + 5.3. Filters + 5.3.1. LZMA2 + 5.3.2. Branch/Call/Jump Filters for Executables + 5.3.3. Delta + 5.3.3.1. Format of the Encoded Output + 5.4. Custom Filter IDs + 5.4.1. Reserved Custom Filter ID Ranges + 6. Cyclic Redundancy Checks + 7. References + + +0. Preface + + This document describes the .xz file format (filename suffix + ".xz", MIME type "application/x-xz"). It is intended that this + this format replace the old .lzma format used by LZMA SDK and + LZMA Utils. + + +0.1. Notices and Acknowledgements + + This file format was designed by Lasse Collin + and Igor Pavlov. + + Special thanks for helping with this document goes to + Ville Koskinen. Thanks for helping with this document goes to + Mark Adler, H. Peter Anvin, Mikko Pouru, and Lars Wirzenius. + + This document has been put into the public domain. + + +0.2. Getting the Latest Version + + The latest official version of this document can be downloaded + from . + + Specific versions of this document have a filename + xz-file-format-X.Y.Z.txt where X.Y.Z is the version number. + For example, the version 1.0.0 of this document is available + at . + + +0.3. Version History + + Version Date Description + + 1.0.4 2009-08-27 Language improvements in Sections 1.2, + 2.1.1.2, 3.1.1, 3.1.2, and 5.3.1 + + 1.0.3 2009-06-05 Spelling fixes in Sections 5.1 and 5.4 + + 1.0.2 2009-06-04 Typo fixes in Sections 4 and 5.3.1 + + 1.0.1 2009-06-01 Typo fix in Section 0.3 and minor + clarifications to Sections 2, 2.2, + 3.3, 4.4, and 5.3.2 + + 1.0.0 2009-01-14 The first official version + + +1. Conventions + + The key words "MUST", "MUST NOT", "REQUIRED", "SHOULD", + "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this + document are to be interpreted as described in [RFC-2119]. + + Indicating a warning means displaying a message, returning + appropriate exit status, or doing something else to let the + user know that something worth warning occurred. The operation + SHOULD still finish if a warning is indicated. + + Indicating an error means displaying a message, returning + appropriate exit status, or doing something else to let the + user know that something prevented successfully finishing the + operation. The operation MUST be aborted once an error has + been indicated. + + +1.1. Byte and Its Representation + + In this document, byte is always 8 bits. + + A "null byte" has all bits unset. That is, the value of a null + byte is 0x00. + + To represent byte blocks, this document uses notation that + is similar to the notation used in [RFC-1952]: + + +-------+ + | Foo | One byte. + +-------+ + + +---+---+ + | Foo | Two bytes; that is, some of the vertical bars + +---+---+ can be missing. + + +=======+ + | Foo | Zero or more bytes. + +=======+ + + In this document, a boxed byte or a byte sequence declared + using this notation is called "a field". The example field + above would be called "the Foo field" or plain "Foo". + + If there are many fields, they may be split to multiple lines. + This is indicated with an arrow ("--->"): + + +=====+ + | Foo | + +=====+ + + +=====+ + ---> | Bar | + +=====+ + + The above is equivalent to this: + + +=====+=====+ + | Foo | Bar | + +=====+=====+ + + +1.2. Multibyte Integers + + Multibyte integers of static length, such as CRC values, + are stored in little endian byte order (least significant + byte first). + + When smaller values are more likely than bigger values (for + example file sizes), multibyte integers are encoded in a + variable-length representation: + - Numbers in the range [0, 127] are copied as is, and take + one byte of space. + - Bigger numbers will occupy two or more bytes. All but the + last byte of the multibyte representation have the highest + (eighth) bit set. + + For now, the value of the variable-length integers is limited + to 63 bits, which limits the encoded size of the integer to + nine bytes. These limits may be increased in the future if + needed. + + The following C code illustrates encoding and decoding of + variable-length integers. The functions return the number of + bytes occupied by the integer (1-9), or zero on error. + + #include + #include + + size_t + encode(uint8_t buf[static 9], uint64_t num) + { + if (num > UINT64_MAX / 2) + return 0; + + size_t i = 0; + + while (num >= 0x80) { + buf[i++] = (uint8_t)(num) | 0x80; + num >>= 7; + } + + buf[i++] = (uint8_t)(num); + + return i; + } + + size_t + decode(const uint8_t buf[], size_t size_max, uint64_t *num) + { + if (size_max == 0) + return 0; + + if (size_max > 9) + size_max = 9; + + *num = buf[0] & 0x7F; + size_t i = 0; + + while (buf[i++] & 0x80) { + if (i >= size_max || buf[i] == 0x00) + return 0; + + *num |= (uint64_t)(buf[i] & 0x7F) << (i * 7); + } + + return i; + } + + +2. Overall Structure of .xz File + + A standalone .xz files consist of one or more Streams which may + have Stream Padding between or after them: + + +========+================+========+================+ + | Stream | Stream Padding | Stream | Stream Padding | ... + +========+================+========+================+ + + The sizes of Stream and Stream Padding are always multiples + of four bytes, thus the size of every valid .xz file MUST be + a multiple of four bytes. + + While a typical file contains only one Stream and no Stream + Padding, a decoder handling standalone .xz files SHOULD support + files that have more than one Stream or Stream Padding. + + In contrast to standalone .xz files, when the .xz file format + is used as an internal part of some other file format or + communication protocol, it usually is expected that the decoder + stops after the first Stream, and doesn't look for Stream + Padding or possibly other Streams. + + +2.1. Stream + + +-+-+-+-+-+-+-+-+-+-+-+-+=======+=======+ +=======+ + | Stream Header | Block | Block | ... | Block | + +-+-+-+-+-+-+-+-+-+-+-+-+=======+=======+ +=======+ + + +=======+-+-+-+-+-+-+-+-+-+-+-+-+ + ---> | Index | Stream Footer | + +=======+-+-+-+-+-+-+-+-+-+-+-+-+ + + All the above fields have a size that is a multiple of four. If + Stream is used as an internal part of another file format, it + is RECOMMENDED to make the Stream start at an offset that is + a multiple of four bytes. + + Stream Header, Index, and Stream Footer are always present in + a Stream. The maximum size of the Index field is 16 GiB (2^34). + + There are zero or more Blocks. The maximum number of Blocks is + limited only by the maximum size of the Index field. + + Total size of a Stream MUST be less than 8 EiB (2^63 bytes). + The same limit applies to the total amount of uncompressed + data stored in a Stream. + + If an implementation supports handling .xz files with multiple + concatenated Streams, it MAY apply the above limits to the file + as a whole instead of limiting per Stream basis. + + +2.1.1. Stream Header + + +---+---+---+---+---+---+-------+------+--+--+--+--+ + | Header Magic Bytes | Stream Flags | CRC32 | + +---+---+---+---+---+---+-------+------+--+--+--+--+ + + +2.1.1.1. Header Magic Bytes + + The first six (6) bytes of the Stream are so called Header + Magic Bytes. They can be used to identify the file type. + + Using a C array and ASCII: + const uint8_t HEADER_MAGIC[6] + = { 0xFD, '7', 'z', 'X', 'Z', 0x00 }; + + In plain hexadecimal: + FD 37 7A 58 5A 00 + + Notes: + - The first byte (0xFD) was chosen so that the files cannot + be erroneously detected as being in .lzma format, in which + the first byte is in the range [0x00, 0xE0]. + - The sixth byte (0x00) was chosen to prevent applications + from misdetecting the file as a text file. + + If the Header Magic Bytes don't match, the decoder MUST + indicate an error. + + +2.1.1.2. Stream Flags + + The first byte of Stream Flags is always a null byte. In the + future, this byte may be used to indicate a new Stream version + or other Stream properties. + + The second byte of Stream Flags is a bit field: + + Bit(s) Mask Description + 0-3 0x0F Type of Check (see Section 3.4): + ID Size Check name + 0x00 0 bytes None + 0x01 4 bytes CRC32 + 0x02 4 bytes (Reserved) + 0x03 4 bytes (Reserved) + 0x04 8 bytes CRC64 + 0x05 8 bytes (Reserved) + 0x06 8 bytes (Reserved) + 0x07 16 bytes (Reserved) + 0x08 16 bytes (Reserved) + 0x09 16 bytes (Reserved) + 0x0A 32 bytes SHA-256 + 0x0B 32 bytes (Reserved) + 0x0C 32 bytes (Reserved) + 0x0D 64 bytes (Reserved) + 0x0E 64 bytes (Reserved) + 0x0F 64 bytes (Reserved) + 4-7 0xF0 Reserved for future use; MUST be zero for now. + + Implementations SHOULD support at least the Check IDs 0x00 + (None) and 0x01 (CRC32). Supporting other Check IDs is + OPTIONAL. If an unsupported Check is used, the decoder SHOULD + indicate a warning or error. + + If any reserved bit is set, the decoder MUST indicate an error. + It is possible that there is a new field present which the + decoder is not aware of, and can thus parse the Stream Header + incorrectly. + + +2.1.1.3. CRC32 + + The CRC32 is calculated from the Stream Flags field. It is + stored as an unsigned 32-bit little endian integer. If the + calculated value does not match the stored one, the decoder + MUST indicate an error. + + The idea is that Stream Flags would always be two bytes, even + if new features are needed. This way old decoders will be able + to verify the CRC32 calculated from Stream Flags, and thus + distinguish between corrupt files (CRC32 doesn't match) and + files that the decoder doesn't support (CRC32 matches but + Stream Flags has reserved bits set). + + +2.1.2. Stream Footer + + +-+-+-+-+---+---+---+---+-------+------+----------+---------+ + | CRC32 | Backward Size | Stream Flags | Footer Magic Bytes | + +-+-+-+-+---+---+---+---+-------+------+----------+---------+ + + +2.1.2.1. CRC32 + + The CRC32 is calculated from the Backward Size and Stream Flags + fields. It is stored as an unsigned 32-bit little endian + integer. If the calculated value does not match the stored one, + the decoder MUST indicate an error. + + The reason to have the CRC32 field before the Backward Size and + Stream Flags fields is to keep the four-byte fields aligned to + a multiple of four bytes. + + +2.1.2.2. Backward Size + + Backward Size is stored as a 32-bit little endian integer, + which indicates the size of the Index field as multiple of + four bytes, minimum value being four bytes: + + real_backward_size = (stored_backward_size + 1) * 4; + + If the stored value does not match the real size of the Index + field, the decoder MUST indicate an error. + + Using a fixed-size integer to store Backward Size makes + it slightly simpler to parse the Stream Footer when the + application needs to parse the Stream backwards. + + +2.1.2.3. Stream Flags + + This is a copy of the Stream Flags field from the Stream + Header. The information stored to Stream Flags is needed + when parsing the Stream backwards. The decoder MUST compare + the Stream Flags fields in both Stream Header and Stream + Footer, and indicate an error if they are not identical. + + +2.1.2.4. Footer Magic Bytes + + As the last step of the decoding process, the decoder MUST + verify the existence of Footer Magic Bytes. If they don't + match, an error MUST be indicated. + + Using a C array and ASCII: + const uint8_t FOOTER_MAGIC[2] = { 'Y', 'Z' }; + + In hexadecimal: + 59 5A + + The primary reason to have Footer Magic Bytes is to make + it easier to detect incomplete files quickly, without + uncompressing. If the file does not end with Footer Magic Bytes + (excluding Stream Padding described in Section 2.2), it cannot + be undamaged, unless someone has intentionally appended garbage + after the end of the Stream. + + +2.2. Stream Padding + + Only the decoders that support decoding of concatenated Streams + MUST support Stream Padding. + + Stream Padding MUST contain only null bytes. To preserve the + four-byte alignment of consecutive Streams, the size of Stream + Padding MUST be a multiple of four bytes. Empty Stream Padding + is allowed. If these requirements are not met, the decoder MUST + indicate an error. + + Note that non-empty Stream Padding is allowed at the end of the + file; there doesn't need to be a new Stream after non-empty + Stream Padding. This can be convenient in certain situations + [GNU-tar]. + + The possibility of Stream Padding MUST be taken into account + when designing an application that parses Streams backwards, + and the application supports concatenated Streams. + + +3. Block + + +==============+=================+===============+=======+ + | Block Header | Compressed Data | Block Padding | Check | + +==============+=================+===============+=======+ + + +3.1. Block Header + + +-------------------+-------------+=================+ + | Block Header Size | Block Flags | Compressed Size | + +-------------------+-------------+=================+ + + +===================+======================+ + ---> | Uncompressed Size | List of Filter Flags | + +===================+======================+ + + +================+--+--+--+--+ + ---> | Header Padding | CRC32 | + +================+--+--+--+--+ + + +3.1.1. Block Header Size + + This field overlaps with the Index Indicator field (see + Section 4.1). + + This field contains the size of the Block Header field, + including the Block Header Size field itself. Valid values are + in the range [0x01, 0xFF], which indicate the size of the Block + Header as multiples of four bytes, minimum size being eight + bytes: + + real_header_size = (encoded_header_size + 1) * 4; + + If a Block Header bigger than 1024 bytes is needed in the + future, a new field can be added between the Block Header and + Compressed Data fields. The presence of this new field would + be indicated in the Block Header field. + + +3.1.2. Block Flags + + The Block Flags field is a bit field: + + Bit(s) Mask Description + 0-1 0x03 Number of filters (1-4) + 2-5 0x3C Reserved for future use; MUST be zero for now. + 6 0x40 The Compressed Size field is present. + 7 0x80 The Uncompressed Size field is present. + + If any reserved bit is set, the decoder MUST indicate an error. + It is possible that there is a new field present which the + decoder is not aware of, and can thus parse the Block Header + incorrectly. + + +3.1.3. Compressed Size + + This field is present only if the appropriate bit is set in + the Block Flags field (see Section 3.1.2). + + The Compressed Size field contains the size of the Compressed + Data field, which MUST be non-zero. Compressed Size is stored + using the encoding described in Section 1.2. If the Compressed + Size doesn't match the size of the Compressed Data field, the + decoder MUST indicate an error. + + +3.1.4. Uncompressed Size + + This field is present only if the appropriate bit is set in + the Block Flags field (see Section 3.1.2). + + The Uncompressed Size field contains the size of the Block + after uncompressing. Uncompressed Size is stored using the + encoding described in Section 1.2. If the Uncompressed Size + does not match the real uncompressed size, the decoder MUST + indicate an error. + + Storing the Compressed Size and Uncompressed Size fields serves + several purposes: + - The decoder knows how much memory it needs to allocate + for a temporary buffer in multithreaded mode. + - Simple error detection: wrong size indicates a broken file. + - Seeking forwards to a specific location in streamed mode. + + It should be noted that the only reliable way to determine + the real uncompressed size is to uncompress the Block, + because the Block Header and Index fields may contain + (intentionally or unintentionally) invalid information. + + +3.1.5. List of Filter Flags + + +================+================+ +================+ + | Filter 0 Flags | Filter 1 Flags | ... | Filter n Flags | + +================+================+ +================+ + + The number of Filter Flags fields is stored in the Block Flags + field (see Section 3.1.2). + + The format of each Filter Flags field is as follows: + + +===========+====================+===================+ + | Filter ID | Size of Properties | Filter Properties | + +===========+====================+===================+ + + Both Filter ID and Size of Properties are stored using the + encoding described in Section 1.2. Size of Properties indicates + the size of the Filter Properties field as bytes. The list of + officially defined Filter IDs and the formats of their Filter + Properties are described in Section 5.3. + + Filter IDs greater than or equal to 0x4000_0000_0000_0000 + (2^62) are reserved for implementation-specific internal use. + These Filter IDs MUST never be used in List of Filter Flags. + + +3.1.6. Header Padding + + This field contains as many null byte as it is needed to make + the Block Header have the size specified in Block Header Size. + If any of the bytes are not null bytes, the decoder MUST + indicate an error. It is possible that there is a new field + present which the decoder is not aware of, and can thus parse + the Block Header incorrectly. + + +3.1.7. CRC32 + + The CRC32 is calculated over everything in the Block Header + field except the CRC32 field itself. It is stored as an + unsigned 32-bit little endian integer. If the calculated + value does not match the stored one, the decoder MUST indicate + an error. + + By verifying the CRC32 of the Block Header before parsing the + actual contents allows the decoder to distinguish between + corrupt and unsupported files. + + +3.2. Compressed Data + + The format of Compressed Data depends on Block Flags and List + of Filter Flags. Excluding the descriptions of the simplest + filters in Section 5.3, the format of the filter-specific + encoded data is out of scope of this document. + + +3.3. Block Padding + + Block Padding MUST contain 0-3 null bytes to make the size of + the Block a multiple of four bytes. This can be needed when + the size of Compressed Data is not a multiple of four. If any + of the bytes in Block Padding are not null bytes, the decoder + MUST indicate an error. + + +3.4. Check + + The type and size of the Check field depends on which bits + are set in the Stream Flags field (see Section 2.1.1.2). + + The Check, when used, is calculated from the original + uncompressed data. If the calculated Check does not match the + stored one, the decoder MUST indicate an error. If the selected + type of Check is not supported by the decoder, it SHOULD + indicate a warning or error. + + +4. Index + + +-----------------+===================+ + | Index Indicator | Number of Records | + +-----------------+===================+ + + +=================+===============+-+-+-+-+ + ---> | List of Records | Index Padding | CRC32 | + +=================+===============+-+-+-+-+ + + Index serves several purposes. Using it, one can + - verify that all Blocks in a Stream have been processed; + - find out the uncompressed size of a Stream; and + - quickly access the beginning of any Block (random access). + + +4.1. Index Indicator + + This field overlaps with the Block Header Size field (see + Section 3.1.1). The value of Index Indicator is always 0x00. + + +4.2. Number of Records + + This field indicates how many Records there are in the List + of Records field, and thus how many Blocks there are in the + Stream. The value is stored using the encoding described in + Section 1.2. If the decoder has decoded all the Blocks of the + Stream, and then notices that the Number of Records doesn't + match the real number of Blocks, the decoder MUST indicate an + error. + + +4.3. List of Records + + List of Records consists of as many Records as indicated by the + Number of Records field: + + +========+========+ + | Record | Record | ... + +========+========+ + + Each Record contains information about one Block: + + +===============+===================+ + | Unpadded Size | Uncompressed Size | + +===============+===================+ + + If the decoder has decoded all the Blocks of the Stream, it + MUST verify that the contents of the Records match the real + Unpadded Size and Uncompressed Size of the respective Blocks. + + Implementation hint: It is possible to verify the Index with + constant memory usage by calculating for example SHA-256 of + both the real size values and the List of Records, then + comparing the hash values. Implementing this using + non-cryptographic hash like CRC32 SHOULD be avoided unless + small code size is important. + + If the decoder supports random-access reading, it MUST verify + that Unpadded Size and Uncompressed Size of every completely + decoded Block match the sizes stored in the Index. If only + partial Block is decoded, the decoder MUST verify that the + processed sizes don't exceed the sizes stored in the Index. + + +4.3.1. Unpadded Size + + This field indicates the size of the Block excluding the Block + Padding field. That is, Unpadded Size is the size of the Block + Header, Compressed Data, and Check fields. Unpadded Size is + stored using the encoding described in Section 1.2. The value + MUST never be zero; with the current structure of Blocks, the + actual minimum value for Unpadded Size is five. + + Implementation note: Because the size of the Block Padding + field is not included in Unpadded Size, calculating the total + size of a Stream or doing random-access reading requires + calculating the actual size of the Blocks by rounding Unpadded + Sizes up to the next multiple of four. + + The reason to exclude Block Padding from Unpadded Size is to + ease making a raw copy of Compressed Data without Block + Padding. This can be useful, for example, if someone wants + to convert Streams to some other file format quickly. + + +4.3.2. Uncompressed Size + + This field indicates the Uncompressed Size of the respective + Block as bytes. The value is stored using the encoding + described in Section 1.2. + + +4.4. Index Padding + + This field MUST contain 0-3 null bytes to pad the Index to + a multiple of four bytes. If any of the bytes are not null + bytes, the decoder MUST indicate an error. + + +4.5. CRC32 + + The CRC32 is calculated over everything in the Index field + except the CRC32 field itself. The CRC32 is stored as an + unsigned 32-bit little endian integer. If the calculated + value does not match the stored one, the decoder MUST indicate + an error. + + +5. Filter Chains + + The Block Flags field defines how many filters are used. When + more than one filter is used, the filters are chained; that is, + the output of one filter is the input of another filter. The + following figure illustrates the direction of data flow. + + v Uncompressed Data ^ + | Filter 0 | + Encoder | Filter 1 | Decoder + | Filter n | + v Compressed Data ^ + + +5.1. Alignment + + Alignment of uncompressed input data is usually the job of + the application producing the data. For example, to get the + best results, an archiver tool should make sure that all + PowerPC executable files in the archive stream start at + offsets that are multiples of four bytes. + + Some filters, for example LZMA2, can be configured to take + advantage of specified alignment of input data. Note that + taking advantage of aligned input can be beneficial also when + a filter is not the first filter in the chain. For example, + if you compress PowerPC executables, you may want to use the + PowerPC filter and chain that with the LZMA2 filter. Because + not only the input but also the output alignment of the PowerPC + filter is four bytes, it is now beneficial to set LZMA2 + settings so that the LZMA2 encoder can take advantage of its + four-byte-aligned input data. + + The output of the last filter in the chain is stored to the + Compressed Data field, which is is guaranteed to be aligned + to a multiple of four bytes relative to the beginning of the + Stream. This can increase + - speed, if the filtered data is handled multiple bytes at + a time by the filter-specific encoder and decoder, + because accessing aligned data in computer memory is + usually faster; and + - compression ratio, if the output data is later compressed + with an external compression tool. + + +5.2. Security + + If filters would be allowed to be chained freely, it would be + possible to create malicious files, that would be very slow to + decode. Such files could be used to create denial of service + attacks. + + Slow files could occur when multiple filters are chained: + + v Compressed input data + | Filter 1 decoder (last filter) + | Filter 0 decoder (non-last filter) + v Uncompressed output data + + The decoder of the last filter in the chain produces a lot of + output from little input. Another filter in the chain takes the + output of the last filter, and produces very little output + while consuming a lot of input. As a result, a lot of data is + moved inside the filter chain, but the filter chain as a whole + gets very little work done. + + To prevent this kind of slow files, there are restrictions on + how the filters can be chained. These restrictions MUST be + taken into account when designing new filters. + + The maximum number of filters in the chain has been limited to + four, thus there can be at maximum of three non-last filters. + Of these three non-last filters, only two are allowed to change + the size of the data. + + The non-last filters, that change the size of the data, MUST + have a limit how much the decoder can compress the data: the + decoder SHOULD produce at least n bytes of output when the + filter is given 2n bytes of input. This limit is not + absolute, but significant deviations MUST be avoided. + + The above limitations guarantee that if the last filter in the + chain produces 4n bytes of output, the chain as a whole will + produce at least n bytes of output. + + +5.3. Filters + +5.3.1. LZMA2 + + LZMA (Lempel-Ziv-Markov chain-Algorithm) is a general-purpose + compression algorithm with high compression ratio and fast + decompression. LZMA is based on LZ77 and range coding + algorithms. + + LZMA2 is an extension on top of the original LZMA. LZMA2 uses + LZMA internally, but adds support for flushing the encoder, + uncompressed chunks, eases stateful decoder implementations, + and improves support for multithreading. Thus, the plain LZMA + will not be supported in this file format. + + Filter ID: 0x21 + Size of Filter Properties: 1 byte + Changes size of data: Yes + Allow as a non-last filter: No + Allow as the last filter: Yes + + Preferred alignment: + Input data: Adjustable to 1/2/4/8/16 byte(s) + Output data: 1 byte + + The format of the one-byte Filter Properties field is as + follows: + + Bits Mask Description + 0-5 0x3F Dictionary Size + 6-7 0xC0 Reserved for future use; MUST be zero for now. + + Dictionary Size is encoded with one-bit mantissa and five-bit + exponent. The smallest dictionary size is 4 KiB and the biggest + is 4 GiB. + + Raw value Mantissa Exponent Dictionary size + 0 2 11 4 KiB + 1 3 11 6 KiB + 2 2 12 8 KiB + 3 3 12 12 KiB + 4 2 13 16 KiB + 5 3 13 24 KiB + 6 2 14 32 KiB + ... ... ... ... + 35 3 27 768 MiB + 36 2 28 1024 MiB + 37 3 29 1536 MiB + 38 2 30 2048 MiB + 39 3 30 3072 MiB + 40 2 31 4096 MiB - 1 B + + Instead of having a table in the decoder, the dictionary size + can be decoded using the following C code: + + const uint8_t bits = get_dictionary_flags() & 0x3F; + if (bits > 40) + return DICTIONARY_TOO_BIG; // Bigger than 4 GiB + + uint32_t dictionary_size; + if (bits == 40) { + dictionary_size = UINT32_MAX; + } else { + dictionary_size = 2 | (bits & 1); + dictionary_size <<= bits / 2 + 11; + } + + +5.3.2. Branch/Call/Jump Filters for Executables + + These filters convert relative branch, call, and jump + instructions to their absolute counterparts in executable + files. This conversion increases redundancy and thus + compression ratio. + + Size of Filter Properties: 0 or 4 bytes + Changes size of data: No + Allow as a non-last filter: Yes + Allow as the last filter: No + + Below is the list of filters in this category. The alignment + is the same for both input and output data. + + Filter ID Alignment Description + 0x04 1 byte x86 filter (BCJ) + 0x05 4 bytes PowerPC (big endian) filter + 0x06 16 bytes IA64 filter + 0x07 4 bytes ARM (little endian) filter + 0x08 2 bytes ARM Thumb (little endian) filter + 0x09 4 bytes SPARC filter + + If the size of Filter Properties is four bytes, the Filter + Properties field contains the start offset used for address + conversions. It is stored as an unsigned 32-bit little endian + integer. The start offset MUST be a multiple of the alignment + of the filter as listed in the table above; if it isn't, the + decoder MUST indicate an error. If the size of Filter + Properties is zero, the start offset is zero. + + Setting the start offset may be useful if an executable has + multiple sections, and there are many cross-section calls. + Taking advantage of this feature usually requires usage of + the Subblock filter, whose design is not complete yet. + + +5.3.3. Delta + + The Delta filter may increase compression ratio when the value + of the next byte correlates with the value of an earlier byte + at specified distance. + + Filter ID: 0x03 + Size of Filter Properties: 1 byte + Changes size of data: No + Allow as a non-last filter: Yes + Allow as the last filter: No + + Preferred alignment: + Input data: 1 byte + Output data: Same as the original input data + + The Properties byte indicates the delta distance, which can be + 1-256 bytes backwards from the current byte: 0x00 indicates + distance of 1 byte and 0xFF distance of 256 bytes. + + +5.3.3.1. Format of the Encoded Output + + The code below illustrates both encoding and decoding with + the Delta filter. + + // Distance is in the range [1, 256]. + const unsigned int distance = get_properties_byte() + 1; + uint8_t pos = 0; + uint8_t delta[256]; + + memset(delta, 0, sizeof(delta)); + + while (1) { + const int byte = read_byte(); + if (byte == EOF) + break; + + uint8_t tmp = delta[(uint8_t)(distance + pos)]; + if (is_encoder) { + tmp = (uint8_t)(byte) - tmp; + delta[pos] = (uint8_t)(byte); + } else { + tmp = (uint8_t)(byte) + tmp; + delta[pos] = tmp; + } + + write_byte(tmp); + --pos; + } + + +5.4. Custom Filter IDs + + If a developer wants to use custom Filter IDs, he has two + choices. The first choice is to contact Lasse Collin and ask + him to allocate a range of IDs for the developer. + + The second choice is to generate a 40-bit random integer, + which the developer can use as his personal Developer ID. + To minimize the risk of collisions, Developer ID has to be + a randomly generated integer, not manually selected "hex word". + The following command, which works on many free operating + systems, can be used to generate Developer ID: + + dd if=/dev/urandom bs=5 count=1 | hexdump + + The developer can then use his Developer ID to create unique + (well, hopefully unique) Filter IDs. + + Bits Mask Description + 0-15 0x0000_0000_0000_FFFF Filter ID + 16-55 0x00FF_FFFF_FFFF_0000 Developer ID + 56-62 0x3F00_0000_0000_0000 Static prefix: 0x3F + + The resulting 63-bit integer will use 9 bytes of space when + stored using the encoding described in Section 1.2. To get + a shorter ID, see the beginning of this Section how to + request a custom ID range. + + +5.4.1. Reserved Custom Filter ID Ranges + + Range Description + 0x0000_0300 - 0x0000_04FF Reserved to ease .7z compatibility + 0x0002_0000 - 0x0007_FFFF Reserved to ease .7z compatibility + 0x0200_0000 - 0x07FF_FFFF Reserved to ease .7z compatibility + + +6. Cyclic Redundancy Checks + + There are several incompatible variations to calculate CRC32 + and CRC64. For simplicity and clarity, complete examples are + provided to calculate the checks as they are used in this file + format. Implementations MAY use different code as long as it + gives identical results. + + The program below reads data from standard input, calculates + the CRC32 and CRC64 values, and prints the calculated values + as big endian hexadecimal strings to standard output. + + #include + #include + #include + + uint32_t crc32_table[256]; + uint64_t crc64_table[256]; + + void + init(void) + { + static const uint32_t poly32 = UINT32_C(0xEDB88320); + static const uint64_t poly64 + = UINT64_C(0xC96C5795D7870F42); + + for (size_t i = 0; i < 256; ++i) { + uint32_t crc32 = i; + uint64_t crc64 = i; + + for (size_t j = 0; j < 8; ++j) { + if (crc32 & 1) + crc32 = (crc32 >> 1) ^ poly32; + else + crc32 >>= 1; + + if (crc64 & 1) + crc64 = (crc64 >> 1) ^ poly64; + else + crc64 >>= 1; + } + + crc32_table[i] = crc32; + crc64_table[i] = crc64; + } + } + + uint32_t + crc32(const uint8_t *buf, size_t size, uint32_t crc) + { + crc = ~crc; + for (size_t i = 0; i < size; ++i) + crc = crc32_table[buf[i] ^ (crc & 0xFF)] + ^ (crc >> 8); + return ~crc; + } + + uint64_t + crc64(const uint8_t *buf, size_t size, uint64_t crc) + { + crc = ~crc; + for (size_t i = 0; i < size; ++i) + crc = crc64_table[buf[i] ^ (crc & 0xFF)] + ^ (crc >> 8); + return ~crc; + } + + int + main() + { + init(); + + uint32_t value32 = 0; + uint64_t value64 = 0; + uint64_t total_size = 0; + uint8_t buf[8192]; + + while (1) { + const size_t buf_size + = fread(buf, 1, sizeof(buf), stdin); + if (buf_size == 0) + break; + + total_size += buf_size; + value32 = crc32(buf, buf_size, value32); + value64 = crc64(buf, buf_size, value64); + } + + printf("Bytes: %" PRIu64 "\n", total_size); + printf("CRC-32: 0x%08" PRIX32 "\n", value32); + printf("CRC-64: 0x%016" PRIX64 "\n", value64); + + return 0; + } + + +7. References + + LZMA SDK - The original LZMA implementation + http://7-zip.org/sdk.html + + LZMA Utils - LZMA adapted to POSIX-like systems + http://tukaani.org/lzma/ + + XZ Utils - The next generation of LZMA Utils + http://tukaani.org/xz/ + + [RFC-1952] + GZIP file format specification version 4.3 + http://www.ietf.org/rfc/rfc1952.txt + - Notation of byte boxes in section "2.1. Overall conventions" + + [RFC-2119] + Key words for use in RFCs to Indicate Requirement Levels + http://www.ietf.org/rfc/rfc2119.txt + + [GNU-tar] + GNU tar 1.21 manual + http://www.gnu.org/software/tar/manual/html_node/Blocking-Factor.html + - Node 9.4.2 "Blocking Factor", paragraph that begins + "gzip will complain about trailing garbage" + - Note that this URL points to the latest version of the + manual, and may some day not contain the note which is in + 1.21. For the exact version of the manual, download GNU + tar 1.21: ftp://ftp.gnu.org/pub/gnu/tar/tar-1.21.tar.gz + diff --git a/usr/share/man/man1/lzcat.1 b/usr/share/man/man1/lzcat.1 new file mode 120000 index 0000000..01b6808 --- /dev/null +++ b/usr/share/man/man1/lzcat.1 @@ -0,0 +1 @@ +xz.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzcmp.1 b/usr/share/man/man1/lzcmp.1 new file mode 120000 index 0000000..e65fb97 --- /dev/null +++ b/usr/share/man/man1/lzcmp.1 @@ -0,0 +1 @@ +xzdiff.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzdiff.1 b/usr/share/man/man1/lzdiff.1 new file mode 120000 index 0000000..e65fb97 --- /dev/null +++ b/usr/share/man/man1/lzdiff.1 @@ -0,0 +1 @@ +xzdiff.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzegrep.1 b/usr/share/man/man1/lzegrep.1 new file mode 120000 index 0000000..bebd8a4 --- /dev/null +++ b/usr/share/man/man1/lzegrep.1 @@ -0,0 +1 @@ +xzgrep.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzfgrep.1 b/usr/share/man/man1/lzfgrep.1 new file mode 120000 index 0000000..bebd8a4 --- /dev/null +++ b/usr/share/man/man1/lzfgrep.1 @@ -0,0 +1 @@ +xzgrep.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzgrep.1 b/usr/share/man/man1/lzgrep.1 new file mode 120000 index 0000000..bebd8a4 --- /dev/null +++ b/usr/share/man/man1/lzgrep.1 @@ -0,0 +1 @@ +xzgrep.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzless.1 b/usr/share/man/man1/lzless.1 new file mode 120000 index 0000000..1b00a0a --- /dev/null +++ b/usr/share/man/man1/lzless.1 @@ -0,0 +1 @@ +xzless.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzma.1 b/usr/share/man/man1/lzma.1 new file mode 120000 index 0000000..01b6808 --- /dev/null +++ b/usr/share/man/man1/lzma.1 @@ -0,0 +1 @@ +xz.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzmadec.1 b/usr/share/man/man1/lzmadec.1 new file mode 120000 index 0000000..cd2223d --- /dev/null +++ b/usr/share/man/man1/lzmadec.1 @@ -0,0 +1 @@ +xzdec.1 \ No newline at end of file diff --git a/usr/share/man/man1/lzmainfo.1 b/usr/share/man/man1/lzmainfo.1 new file mode 100644 index 0000000..ce38eee --- /dev/null +++ b/usr/share/man/man1/lzmainfo.1 @@ -0,0 +1,60 @@ +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH LZMAINFO 1 "2013-06-30" "Tukaani" "XZ Utils" +.SH NAME +lzmainfo \- show information stored in the .lzma file header +.SH SYNOPSIS +.B lzmainfo +.RB [ \-\-help ] +.RB [ \-\-version ] +.RI [ file... ] +.SH DESCRIPTION +.B lzmainfo +shows information stored in the +.B .lzma +file header. +It reads the first 13 bytes from the specified +.IR file , +decodes the header, and prints it to standard output in human +readable format. +If no +.I files +are given or +.I file +is +.BR \- , +standard input is read. +.PP +Usually the most interesting information is +the uncompressed size and the dictionary size. +Uncompressed size can be shown only if +the file is in the non-streamed +.B .lzma +format variant. +The amount of memory required to decompress the file is +a few dozen kilobytes plus the dictionary size. +.PP +.B lzmainfo +is included in XZ Utils primarily for +backward compatibility with LZMA Utils. +.SH "EXIT STATUS" +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.SH BUGS +.B lzmainfo +uses +.B MB +while the correct suffix would be +.B MiB +(2^20 bytes). +This is to keep the output compatible with LZMA Utils. +.SH "SEE ALSO" +.BR xz (1) diff --git a/usr/share/man/man1/lzmore.1 b/usr/share/man/man1/lzmore.1 new file mode 120000 index 0000000..341082b --- /dev/null +++ b/usr/share/man/man1/lzmore.1 @@ -0,0 +1 @@ +xzmore.1 \ No newline at end of file diff --git a/usr/share/man/man1/unlzma.1 b/usr/share/man/man1/unlzma.1 new file mode 120000 index 0000000..01b6808 --- /dev/null +++ b/usr/share/man/man1/unlzma.1 @@ -0,0 +1 @@ +xz.1 \ No newline at end of file diff --git a/usr/share/man/man1/unxz.1 b/usr/share/man/man1/unxz.1 new file mode 120000 index 0000000..01b6808 --- /dev/null +++ b/usr/share/man/man1/unxz.1 @@ -0,0 +1 @@ +xz.1 \ No newline at end of file diff --git a/usr/share/man/man1/xz.1 b/usr/share/man/man1/xz.1 new file mode 100644 index 0000000..bc5514d --- /dev/null +++ b/usr/share/man/man1/xz.1 @@ -0,0 +1,2786 @@ +'\" t +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH XZ 1 "2015-05-11" "Tukaani" "XZ Utils" +. +.SH NAME +xz, unxz, xzcat, lzma, unlzma, lzcat \- Compress or decompress .xz and .lzma files +. +.SH SYNOPSIS +.B xz +.RI [ option... ] +.RI [ file... ] +. +.SH COMMAND ALIASES +.B unxz +is equivalent to +.BR "xz \-\-decompress" . +.br +.B xzcat +is equivalent to +.BR "xz \-\-decompress \-\-stdout" . +.br +.B lzma +is equivalent to +.BR "xz \-\-format=lzma" . +.br +.B unlzma +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress" . +.br +.B lzcat +is equivalent to +.BR "xz \-\-format=lzma \-\-decompress \-\-stdout" . +.PP +When writing scripts that need to decompress files, +it is recommended to always use the name +.B xz +with appropriate arguments +.RB ( "xz \-d" +or +.BR "xz \-dc" ) +instead of the names +.B unxz +and +.BR xzcat . +. +.SH DESCRIPTION +.B xz +is a general-purpose data compression tool with +command line syntax similar to +.BR gzip (1) +and +.BR bzip2 (1). +The native file format is the +.B .xz +format, but the legacy +.B .lzma +format used by LZMA Utils and +raw compressed streams with no container format headers +are also supported. +.PP +.B xz +compresses or decompresses each +.I file +according to the selected operation mode. +If no +.I files +are given or +.I file +is +.BR \- , +.B xz +reads from standard input and writes the processed data +to standard output. +.B xz +will refuse (display an error and skip the +.IR file ) +to write compressed data to standard output if it is a terminal. +Similarly, +.B xz +will refuse to read compressed data +from standard input if it is a terminal. +.PP +Unless +.B \-\-stdout +is specified, +.I files +other than +.B \- +are written to a new file whose name is derived from the source +.I file +name: +.IP \(bu 3 +When compressing, the suffix of the target file format +.RB ( .xz +or +.BR .lzma ) +is appended to the source filename to get the target filename. +.IP \(bu 3 +When decompressing, the +.B .xz +or +.B .lzma +suffix is removed from the filename to get the target filename. +.B xz +also recognizes the suffixes +.B .txz +and +.BR .tlz , +and replaces them with the +.B .tar +suffix. +.PP +If the target file already exists, an error is displayed and the +.I file +is skipped. +.PP +Unless writing to standard output, +.B xz +will display a warning and skip the +.I file +if any of the following applies: +.IP \(bu 3 +.I File +is not a regular file. +Symbolic links are not followed, +and thus they are not considered to be regular files. +.IP \(bu 3 +.I File +has more than one hard link. +.IP \(bu 3 +.I File +has setuid, setgid, or sticky bit set. +.IP \(bu 3 +The operation mode is set to compress and the +.I file +already has a suffix of the target file format +.RB ( .xz +or +.B .txz +when compressing to the +.B .xz +format, and +.B .lzma +or +.B .tlz +when compressing to the +.B .lzma +format). +.IP \(bu 3 +The operation mode is set to decompress and the +.I file +doesn't have a suffix of any of the supported file formats +.RB ( .xz , +.BR .txz , +.BR .lzma , +or +.BR .tlz ). +.PP +After successfully compressing or decompressing the +.IR file , +.B xz +copies the owner, group, permissions, access time, +and modification time from the source +.I file +to the target file. +If copying the group fails, the permissions are modified +so that the target file doesn't become accessible to users +who didn't have permission to access the source +.IR file . +.B xz +doesn't support copying other metadata like access control lists +or extended attributes yet. +.PP +Once the target file has been successfully closed, the source +.I file +is removed unless +.B \-\-keep +was specified. +The source +.I file +is never removed if the output is written to standard output. +.PP +Sending +.B SIGINFO +or +.B SIGUSR1 +to the +.B xz +process makes it print progress information to standard error. +This has only limited use since when standard error +is a terminal, using +.B \-\-verbose +will display an automatically updating progress indicator. +. +.SS "Memory usage" +The memory usage of +.B xz +varies from a few hundred kilobytes to several gigabytes +depending on the compression settings. +The settings used when compressing a file determine +the memory requirements of the decompressor. +Typically the decompressor needs 5\ % to 20\ % of +the amount of memory that the compressor needed when +creating the file. +For example, decompressing a file created with +.B xz \-9 +currently requires 65\ MiB of memory. +Still, it is possible to have +.B .xz +files that require several gigabytes of memory to decompress. +.PP +Especially users of older systems may find +the possibility of very large memory usage annoying. +To prevent uncomfortable surprises, +.B xz +has a built-in memory usage limiter, which is disabled by default. +While some operating systems provide ways to limit +the memory usage of processes, relying on it +wasn't deemed to be flexible enough (e.g. using +.BR ulimit (1) +to limit virtual memory tends to cripple +.BR mmap (2)). +.PP +The memory usage limiter can be enabled with +the command line option \fB\-\-memlimit=\fIlimit\fR. +Often it is more convenient to enable the limiter +by default by setting the environment variable +.BR XZ_DEFAULTS , +e.g.\& +.BR XZ_DEFAULTS=\-\-memlimit=150MiB . +It is possible to set the limits separately +for compression and decompression +by using \fB\-\-memlimit\-compress=\fIlimit\fR and +\fB\-\-memlimit\-decompress=\fIlimit\fR. +Using these two options outside +.B XZ_DEFAULTS +is rarely useful because a single run of +.B xz +cannot do both compression and decompression and +.BI \-\-memlimit= limit +(or \fB\-M\fR \fIlimit\fR) +is shorter to type on the command line. +.PP +If the specified memory usage limit is exceeded when decompressing, +.B xz +will display an error and decompressing the file will fail. +If the limit is exceeded when compressing, +.B xz +will try to scale the settings down so that the limit +is no longer exceeded (except when using \fB\-\-format=raw\fR +or \fB\-\-no\-adjust\fR). +This way the operation won't fail unless the limit is very small. +The scaling of the settings is done in steps that don't +match the compression level presets, e.g. if the limit is +only slightly less than the amount required for +.BR "xz \-9" , +the settings will be scaled down only a little, +not all the way down to +.BR "xz \-8" . +. +.SS "Concatenation and padding with .xz files" +It is possible to concatenate +.B .xz +files as is. +.B xz +will decompress such files as if they were a single +.B .xz +file. +.PP +It is possible to insert padding between the concatenated parts +or after the last part. +The padding must consist of null bytes and the size +of the padding must be a multiple of four bytes. +This can be useful e.g. if the +.B .xz +file is stored on a medium that measures file sizes +in 512-byte blocks. +.PP +Concatenation and padding are not allowed with +.B .lzma +files or raw streams. +. +.SH OPTIONS +. +.SS "Integer suffixes and special values" +In most places where an integer argument is expected, +an optional suffix is supported to easily indicate large integers. +There must be no space between the integer and the suffix. +.TP +.B KiB +Multiply the integer by 1,024 (2^10). +.BR Ki , +.BR k , +.BR kB , +.BR K , +and +.B KB +are accepted as synonyms for +.BR KiB . +.TP +.B MiB +Multiply the integer by 1,048,576 (2^20). +.BR Mi , +.BR m , +.BR M , +and +.B MB +are accepted as synonyms for +.BR MiB . +.TP +.B GiB +Multiply the integer by 1,073,741,824 (2^30). +.BR Gi , +.BR g , +.BR G , +and +.B GB +are accepted as synonyms for +.BR GiB . +.PP +The special value +.B max +can be used to indicate the maximum integer value +supported by the option. +. +.SS "Operation mode" +If multiple operation mode options are given, +the last one takes effect. +.TP +.BR \-z ", " \-\-compress +Compress. +This is the default operation mode when no operation mode option +is specified and no other operation mode is implied from +the command name (for example, +.B unxz +implies +.BR \-\-decompress ). +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Decompress. +.TP +.BR \-t ", " \-\-test +Test the integrity of compressed +.IR files . +This option is equivalent to +.B "\-\-decompress \-\-stdout" +except that the decompressed data is discarded instead of being +written to standard output. +No files are created or removed. +.TP +.BR \-l ", " \-\-list +Print information about compressed +.IR files . +No uncompressed output is produced, +and no files are created or removed. +In list mode, the program cannot read +the compressed data from standard +input or from other unseekable sources. +.IP "" +The default listing shows basic information about +.IR files , +one file per line. +To get more detailed information, use also the +.B \-\-verbose +option. +For even more information, use +.B \-\-verbose +twice, but note that this may be slow, because getting all the extra +information requires many seeks. +The width of verbose output exceeds +80 characters, so piping the output to e.g.\& +.B "less\ \-S" +may be convenient if the terminal isn't wide enough. +.IP "" +The exact output may vary between +.B xz +versions and different locales. +For machine-readable output, +.B \-\-robot \-\-list +should be used. +. +.SS "Operation modifiers" +.TP +.BR \-k ", " \-\-keep +Don't delete the input files. +.TP +.BR \-f ", " \-\-force +This option has several effects: +.RS +.IP \(bu 3 +If the target file already exists, +delete it before compressing or decompressing. +.IP \(bu 3 +Compress or decompress even if the input is +a symbolic link to a regular file, +has more than one hard link, +or has the setuid, setgid, or sticky bit set. +The setuid, setgid, and sticky bits are not copied +to the target file. +.IP \(bu 3 +When used with +.B \-\-decompress +.BR \-\-stdout +and +.B xz +cannot recognize the type of the source file, +copy the source file as is to standard output. +This allows +.B xzcat +.B \-\-force +to be used like +.BR cat (1) +for files that have not been compressed with +.BR xz . +Note that in future, +.B xz +might support new compressed file formats, which may make +.B xz +decompress more types of files instead of copying them as is to +standard output. +.BI \-\-format= format +can be used to restrict +.B xz +to decompress only a single file format. +.RE +.TP +.BR \-c ", " \-\-stdout ", " \-\-to\-stdout +Write the compressed or decompressed data to +standard output instead of a file. +This implies +.BR \-\-keep . +.TP +.B \-\-single\-stream +Decompress only the first +.B .xz +stream, and +silently ignore possible remaining input data following the stream. +Normally such trailing garbage makes +.B xz +display an error. +.IP "" +.B xz +never decompresses more than one stream from +.B .lzma +files or raw streams, but this option still makes +.B xz +ignore the possible trailing data after the +.B .lzma +file or raw stream. +.IP "" +This option has no effect if the operation mode is not +.B \-\-decompress +or +.BR \-\-test . +.TP +.B \-\-no\-sparse +Disable creation of sparse files. +By default, if decompressing into a regular file, +.B xz +tries to make the file sparse if the decompressed data contains +long sequences of binary zeros. +It also works when writing to standard output +as long as standard output is connected to a regular file +and certain additional conditions are met to make it safe. +Creating sparse files may save disk space and speed up +the decompression by reducing the amount of disk I/O. +.TP +\fB\-S\fR \fI.suf\fR, \fB\-\-suffix=\fI.suf +When compressing, use +.I .suf +as the suffix for the target file instead of +.B .xz +or +.BR .lzma . +If not writing to standard output and +the source file already has the suffix +.IR .suf , +a warning is displayed and the file is skipped. +.IP "" +When decompressing, recognize files with the suffix +.I .suf +in addition to files with the +.BR .xz , +.BR .txz , +.BR .lzma , +or +.B .tlz +suffix. +If the source file has the suffix +.IR .suf , +the suffix is removed to get the target filename. +.IP "" +When compressing or decompressing raw streams +.RB ( \-\-format=raw ), +the suffix must always be specified unless +writing to standard output, +because there is no default suffix for raw streams. +.TP +\fB\-\-files\fR[\fB=\fIfile\fR] +Read the filenames to process from +.IR file ; +if +.I file +is omitted, filenames are read from standard input. +Filenames must be terminated with the newline character. +A dash +.RB ( \- ) +is taken as a regular filename; it doesn't mean standard input. +If filenames are given also as command line arguments, they are +processed before the filenames read from +.IR file . +.TP +\fB\-\-files0\fR[\fB=\fIfile\fR] +This is identical to \fB\-\-files\fR[\fB=\fIfile\fR] except +that each filename must be terminated with the null character. +. +.SS "Basic file format and compression options" +.TP +\fB\-F\fR \fIformat\fR, \fB\-\-format=\fIformat +Specify the file +.I format +to compress or decompress: +.RS +.TP +.B auto +This is the default. +When compressing, +.B auto +is equivalent to +.BR xz . +When decompressing, +the format of the input file is automatically detected. +Note that raw streams (created with +.BR \-\-format=raw ) +cannot be auto-detected. +.TP +.B xz +Compress to the +.B .xz +file format, or accept only +.B .xz +files when decompressing. +.TP +.BR lzma ", " alone +Compress to the legacy +.B .lzma +file format, or accept only +.B .lzma +files when decompressing. +The alternative name +.B alone +is provided for backwards compatibility with LZMA Utils. +.TP +.B raw +Compress or uncompress a raw stream (no headers). +This is meant for advanced users only. +To decode raw streams, you need use +.B \-\-format=raw +and explicitly specify the filter chain, +which normally would have been stored in the container headers. +.RE +.TP +\fB\-C\fR \fIcheck\fR, \fB\-\-check=\fIcheck +Specify the type of the integrity check. +The check is calculated from the uncompressed data and +stored in the +.B .xz +file. +This option has an effect only when compressing into the +.B .xz +format; the +.B .lzma +format doesn't support integrity checks. +The integrity check (if any) is verified when the +.B .xz +file is decompressed. +.IP "" +Supported +.I check +types: +.RS +.TP +.B none +Don't calculate an integrity check at all. +This is usually a bad idea. +This can be useful when integrity of the data is verified +by other means anyway. +.TP +.B crc32 +Calculate CRC32 using the polynomial from IEEE-802.3 (Ethernet). +.TP +.B crc64 +Calculate CRC64 using the polynomial from ECMA-182. +This is the default, since it is slightly better than CRC32 +at detecting damaged files and the speed difference is negligible. +.TP +.B sha256 +Calculate SHA-256. +This is somewhat slower than CRC32 and CRC64. +.RE +.IP "" +Integrity of the +.B .xz +headers is always verified with CRC32. +It is not possible to change or disable it. +.TP +.B \-\-ignore\-check +Don't verify the integrity check of the compressed data when decompressing. +The CRC32 values in the +.B .xz +headers will still be verified normally. +.IP "" +.B "Do not use this option unless you know what you are doing." +Possible reasons to use this option: +.RS +.IP \(bu 3 +Trying to recover data from a corrupt .xz file. +.IP \(bu 3 +Speeding up decompression. +This matters mostly with SHA-256 or +with files that have compressed extremely well. +It's recommended to not use this option for this purpose +unless the file integrity is verified externally in some other way. +.RE +.TP +.BR \-0 " ... " \-9 +Select a compression preset level. +The default is +.BR \-6 . +If multiple preset levels are specified, +the last one takes effect. +If a custom filter chain was already specified, setting +a compression preset level clears the custom filter chain. +.IP "" +The differences between the presets are more significant than with +.BR gzip (1) +and +.BR bzip2 (1). +The selected compression settings determine +the memory requirements of the decompressor, +thus using a too high preset level might make it painful +to decompress the file on an old system with little RAM. +Specifically, +.B "it's not a good idea to blindly use \-9 for everything" +like it often is with +.BR gzip (1) +and +.BR bzip2 (1). +.RS +.TP +.BR "\-0" " ... " "\-3" +These are somewhat fast presets. +.B \-0 +is sometimes faster than +.B "gzip \-9" +while compressing much better. +The higher ones often have speed comparable to +.BR bzip2 (1) +with comparable or better compression ratio, +although the results +depend a lot on the type of data being compressed. +.TP +.BR "\-4" " ... " "\-6" +Good to very good compression while keeping +decompressor memory usage reasonable even for old systems. +.B \-6 +is the default, which is usually a good choice +e.g. for distributing files that need to be decompressible +even on systems with only 16\ MiB RAM. +.RB ( \-5e +or +.B \-6e +may be worth considering too. +See +.BR \-\-extreme .) +.TP +.B "\-7 ... \-9" +These are like +.B \-6 +but with higher compressor and decompressor memory requirements. +These are useful only when compressing files bigger than +8\ MiB, 16\ MiB, and 32\ MiB, respectively. +.RE +.IP "" +On the same hardware, the decompression speed is approximately +a constant number of bytes of compressed data per second. +In other words, the better the compression, +the faster the decompression will usually be. +This also means that the amount of uncompressed output +produced per second can vary a lot. +.IP "" +The following table summarises the features of the presets: +.RS +.RS +.PP +.TS +tab(;); +c c c c c +n n n n n. +Preset;DictSize;CompCPU;CompMem;DecMem +\-0;256 KiB;0;3 MiB;1 MiB +\-1;1 MiB;1;9 MiB;2 MiB +\-2;2 MiB;2;17 MiB;3 MiB +\-3;4 MiB;3;32 MiB;5 MiB +\-4;4 MiB;4;48 MiB;5 MiB +\-5;8 MiB;5;94 MiB;9 MiB +\-6;8 MiB;6;94 MiB;9 MiB +\-7;16 MiB;6;186 MiB;17 MiB +\-8;32 MiB;6;370 MiB;33 MiB +\-9;64 MiB;6;674 MiB;65 MiB +.TE +.RE +.RE +.IP "" +Column descriptions: +.RS +.IP \(bu 3 +DictSize is the LZMA2 dictionary size. +It is waste of memory to use a dictionary bigger than +the size of the uncompressed file. +This is why it is good to avoid using the presets +.BR \-7 " ... " \-9 +when there's no real need for them. +At +.B \-6 +and lower, the amount of memory wasted is +usually low enough to not matter. +.IP \(bu 3 +CompCPU is a simplified representation of the LZMA2 settings +that affect compression speed. +The dictionary size affects speed too, +so while CompCPU is the same for levels +.BR \-6 " ... " \-9 , +higher levels still tend to be a little slower. +To get even slower and thus possibly better compression, see +.BR \-\-extreme . +.IP \(bu 3 +CompMem contains the compressor memory requirements +in the single-threaded mode. +It may vary slightly between +.B xz +versions. +Memory requirements of some of the future multithreaded modes may +be dramatically higher than that of the single-threaded mode. +.IP \(bu 3 +DecMem contains the decompressor memory requirements. +That is, the compression settings determine +the memory requirements of the decompressor. +The exact decompressor memory usage is slightly more than +the LZMA2 dictionary size, but the values in the table +have been rounded up to the next full MiB. +.RE +.TP +.BR \-e ", " \-\-extreme +Use a slower variant of the selected compression preset level +.RB ( \-0 " ... " \-9 ) +to hopefully get a little bit better compression ratio, +but with bad luck this can also make it worse. +Decompressor memory usage is not affected, +but compressor memory usage increases a little at preset levels +.BR \-0 " ... " \-3 . +.IP "" +Since there are two presets with dictionary sizes +4\ MiB and 8\ MiB, the presets +.B \-3e +and +.B \-5e +use slightly faster settings (lower CompCPU) than +.B \-4e +and +.BR \-6e , +respectively. +That way no two presets are identical. +.RS +.RS +.PP +.TS +tab(;); +c c c c c +n n n n n. +Preset;DictSize;CompCPU;CompMem;DecMem +\-0e;256 KiB;8;4 MiB;1 MiB +\-1e;1 MiB;8;13 MiB;2 MiB +\-2e;2 MiB;8;25 MiB;3 MiB +\-3e;4 MiB;7;48 MiB;5 MiB +\-4e;4 MiB;8;48 MiB;5 MiB +\-5e;8 MiB;7;94 MiB;9 MiB +\-6e;8 MiB;8;94 MiB;9 MiB +\-7e;16 MiB;8;186 MiB;17 MiB +\-8e;32 MiB;8;370 MiB;33 MiB +\-9e;64 MiB;8;674 MiB;65 MiB +.TE +.RE +.RE +.IP "" +For example, there are a total of four presets that use +8\ MiB dictionary, whose order from the fastest to the slowest is +.BR \-5 , +.BR \-6 , +.BR \-5e , +and +.BR \-6e . +.TP +.B \-\-fast +.PD 0 +.TP +.B \-\-best +.PD +These are somewhat misleading aliases for +.B \-0 +and +.BR \-9 , +respectively. +These are provided only for backwards compatibility +with LZMA Utils. +Avoid using these options. +.TP +.BI \-\-block\-size= size +When compressing to the +.B .xz +format, split the input data into blocks of +.I size +bytes. +The blocks are compressed independently from each other, +which helps with multi-threading and +makes limited random-access decompression possible. +This option is typically used to override the default +block size in multi-threaded mode, +but this option can be used in single-threaded mode too. +.IP "" +In multi-threaded mode about three times +.I size +bytes will be allocated in each thread for buffering input and output. +The default +.I size +is three times the LZMA2 dictionary size or 1 MiB, +whichever is more. +Typically a good value is 2\-4 times +the size of the LZMA2 dictionary or at least 1 MiB. +Using +.I size +less than the LZMA2 dictionary size is waste of RAM +because then the LZMA2 dictionary buffer will never get fully used. +The sizes of the blocks are stored in the block headers, +which a future version of +.B xz +will use for multi-threaded decompression. +.IP "" +In single-threaded mode no block splitting is done by default. +Setting this option doesn't affect memory usage. +No size information is stored in block headers, +thus files created in single-threaded mode +won't be identical to files created in multi-threaded mode. +The lack of size information also means that a future version of +.B xz +won't be able decompress the files in multi-threaded mode. +.TP +.BI \-\-block\-list= sizes +When compressing to the +.B .xz +format, start a new block after +the given intervals of uncompressed data. +.IP "" +The uncompressed +.I sizes +of the blocks are specified as a comma-separated list. +Omitting a size (two or more consecutive commas) is a shorthand +to use the size of the previous block. +.IP "" +If the input file is bigger than the sum of +.IR sizes , +the last value in +.I sizes +is repeated until the end of the file. +A special value of +.B 0 +may be used as the last value to indicate that +the rest of the file should be encoded as a single block. +.IP "" +If one specifies +.I sizes +that exceed the encoder's block size +(either the default value in threaded mode or +the value specified with \fB\-\-block\-size=\fIsize\fR), +the encoder will create additional blocks while +keeping the boundaries specified in +.IR sizes . +For example, if one specifies +.B \-\-block\-size=10MiB +.B \-\-block\-list=5MiB,10MiB,8MiB,12MiB,24MiB +and the input file is 80 MiB, +one will get 11 blocks: +5, 10, 8, 10, 2, 10, 10, 4, 10, 10, and 1 MiB. +.IP "" +In multi-threaded mode the sizes of the blocks +are stored in the block headers. +This isn't done in single-threaded mode, +so the encoded output won't be +identical to that of the multi-threaded mode. +.TP +.BI \-\-flush\-timeout= timeout +When compressing, if more than +.I timeout +milliseconds (a positive integer) has passed since the previous flush and +reading more input would block, +all the pending input data is flushed from the encoder and +made available in the output stream. +This can be useful if +.B xz +is used to compress data that is streamed over a network. +Small +.I timeout +values make the data available at the receiving end +with a small delay, but large +.I timeout +values give better compression ratio. +.IP "" +This feature is disabled by default. +If this option is specified more than once, the last one takes effect. +The special +.I timeout +value of +.B 0 +can be used to explicitly disable this feature. +.IP "" +This feature is not available on non-POSIX systems. +.IP "" +.\" FIXME +.B "This feature is still experimental." +Currently +.B xz +is unsuitable for decompressing the stream in real time due to how +.B xz +does buffering. +.TP +.BI \-\-memlimit\-compress= limit +Set a memory usage limit for compression. +If this option is specified multiple times, +the last one takes effect. +.IP "" +If the compression settings exceed the +.IR limit , +.B xz +will adjust the settings downwards so that +the limit is no longer exceeded and display a notice that +automatic adjustment was done. +Such adjustments are not made when compressing with +.B \-\-format=raw +or if +.B \-\-no\-adjust +has been specified. +In those cases, an error is displayed and +.B xz +will exit with exit status 1. +.IP "" +The +.I limit +can be specified in multiple ways: +.RS +.IP \(bu 3 +The +.I limit +can be an absolute value in bytes. +Using an integer suffix like +.B MiB +can be useful. +Example: +.B "\-\-memlimit\-compress=80MiB" +.IP \(bu 3 +The +.I limit +can be specified as a percentage of total physical memory (RAM). +This can be useful especially when setting the +.B XZ_DEFAULTS +environment variable in a shell initialization script +that is shared between different computers. +That way the limit is automatically bigger +on systems with more memory. +Example: +.B "\-\-memlimit\-compress=70%" +.IP \(bu 3 +The +.I limit +can be reset back to its default value by setting it to +.BR 0 . +This is currently equivalent to setting the +.I limit +to +.B max +(no memory usage limit). +Once multithreading support has been implemented, +there may be a difference between +.B 0 +and +.B max +for the multithreaded case, so it is recommended to use +.B 0 +instead of +.B max +until the details have been decided. +.RE +.IP "" +See also the section +.BR "Memory usage" . +.TP +.BI \-\-memlimit\-decompress= limit +Set a memory usage limit for decompression. +This also affects the +.B \-\-list +mode. +If the operation is not possible without exceeding the +.IR limit , +.B xz +will display an error and decompressing the file will fail. +See +.BI \-\-memlimit\-compress= limit +for possible ways to specify the +.IR limit . +.TP +\fB\-M\fR \fIlimit\fR, \fB\-\-memlimit=\fIlimit\fR, \fB\-\-memory=\fIlimit +This is equivalent to specifying \fB\-\-memlimit\-compress=\fIlimit +\fB\-\-memlimit\-decompress=\fIlimit\fR. +.TP +.B \-\-no\-adjust +Display an error and exit if the compression settings exceed +the memory usage limit. +The default is to adjust the settings downwards so +that the memory usage limit is not exceeded. +Automatic adjusting is always disabled when creating raw streams +.RB ( \-\-format=raw ). +.TP +\fB\-T\fR \fIthreads\fR, \fB\-\-threads=\fIthreads +Specify the number of worker threads to use. +Setting +.I threads +to a special value +.B 0 +makes +.B xz +use as many threads as there are CPU cores on the system. +The actual number of threads can be less than +.I threads +if the input file is not big enough +for threading with the given settings or +if using more threads would exceed the memory usage limit. +.IP "" +Currently the only threading method is to split the input into +blocks and compress them independently from each other. +The default block size depends on the compression level and +can be overriden with the +.BI \-\-block\-size= size +option. +.IP "" +Threaded decompression hasn't been implemented yet. +It will only work on files that contain multiple blocks +with size information in block headers. +All files compressed in multi-threaded mode meet this condition, +but files compressed in single-threaded mode don't even if +.BI \-\-block\-size= size +is used. +. +.SS "Custom compressor filter chains" +A custom filter chain allows specifying +the compression settings in detail instead of relying on +the settings associated to the presets. +When a custom filter chain is specified, +preset options (\fB\-0\fR ... \fB\-9\fR and \fB\-\-extreme\fR) +earlier on the command line are forgotten. +If a preset option is specified +after one or more custom filter chain options, +the new preset takes effect and +the custom filter chain options specified earlier are forgotten. +.PP +A filter chain is comparable to piping on the command line. +When compressing, the uncompressed input goes to the first filter, +whose output goes to the next filter (if any). +The output of the last filter gets written to the compressed file. +The maximum number of filters in the chain is four, +but typically a filter chain has only one or two filters. +.PP +Many filters have limitations on where they can be +in the filter chain: +some filters can work only as the last filter in the chain, +some only as a non-last filter, and some work in any position +in the chain. +Depending on the filter, this limitation is either inherent to +the filter design or exists to prevent security issues. +.PP +A custom filter chain is specified by using one or more +filter options in the order they are wanted in the filter chain. +That is, the order of filter options is significant! +When decoding raw streams +.RB ( \-\-format=raw ), +the filter chain is specified in the same order as +it was specified when compressing. +.PP +Filters take filter-specific +.I options +as a comma-separated list. +Extra commas in +.I options +are ignored. +Every option has a default value, so you need to +specify only those you want to change. +.PP +To see the whole filter chain and +.IR options , +use +.B "xz \-vv" +(that is, use +.B \-\-verbose +twice). +This works also for viewing the filter chain options used by presets. +.TP +\fB\-\-lzma1\fR[\fB=\fIoptions\fR] +.PD 0 +.TP +\fB\-\-lzma2\fR[\fB=\fIoptions\fR] +.PD +Add LZMA1 or LZMA2 filter to the filter chain. +These filters can be used only as the last filter in the chain. +.IP "" +LZMA1 is a legacy filter, +which is supported almost solely due to the legacy +.B .lzma +file format, which supports only LZMA1. +LZMA2 is an updated +version of LZMA1 to fix some practical issues of LZMA1. +The +.B .xz +format uses LZMA2 and doesn't support LZMA1 at all. +Compression speed and ratios of LZMA1 and LZMA2 +are practically the same. +.IP "" +LZMA1 and LZMA2 share the same set of +.IR options : +.RS +.TP +.BI preset= preset +Reset all LZMA1 or LZMA2 +.I options +to +.IR preset . +.I Preset +consist of an integer, which may be followed by single-letter +preset modifiers. +The integer can be from +.B 0 +to +.BR 9 , +matching the command line options \fB\-0\fR ... \fB\-9\fR. +The only supported modifier is currently +.BR e , +which matches +.BR \-\-extreme . +If no +.B preset +is specified, the default values of LZMA1 or LZMA2 +.I options +are taken from the preset +.BR 6 . +.TP +.BI dict= size +Dictionary (history buffer) +.I size +indicates how many bytes of the recently processed +uncompressed data is kept in memory. +The algorithm tries to find repeating byte sequences (matches) in +the uncompressed data, and replace them with references +to the data currently in the dictionary. +The bigger the dictionary, the higher is the chance +to find a match. +Thus, increasing dictionary +.I size +usually improves compression ratio, but +a dictionary bigger than the uncompressed file is waste of memory. +.IP "" +Typical dictionary +.I size +is from 64\ KiB to 64\ MiB. +The minimum is 4\ KiB. +The maximum for compression is currently 1.5\ GiB (1536\ MiB). +The decompressor already supports dictionaries up to +one byte less than 4\ GiB, which is the maximum for +the LZMA1 and LZMA2 stream formats. +.IP "" +Dictionary +.I size +and match finder +.RI ( mf ) +together determine the memory usage of the LZMA1 or LZMA2 encoder. +The same (or bigger) dictionary +.I size +is required for decompressing that was used when compressing, +thus the memory usage of the decoder is determined +by the dictionary size used when compressing. +The +.B .xz +headers store the dictionary +.I size +either as +.RI "2^" n +or +.RI "2^" n " + 2^(" n "\-1)," +so these +.I sizes +are somewhat preferred for compression. +Other +.I sizes +will get rounded up when stored in the +.B .xz +headers. +.TP +.BI lc= lc +Specify the number of literal context bits. +The minimum is 0 and the maximum is 4; the default is 3. +In addition, the sum of +.I lc +and +.I lp +must not exceed 4. +.IP "" +All bytes that cannot be encoded as matches +are encoded as literals. +That is, literals are simply 8-bit bytes +that are encoded one at a time. +.IP "" +The literal coding makes an assumption that the highest +.I lc +bits of the previous uncompressed byte correlate +with the next byte. +E.g. in typical English text, an upper-case letter is +often followed by a lower-case letter, and a lower-case +letter is usually followed by another lower-case letter. +In the US-ASCII character set, the highest three bits are 010 +for upper-case letters and 011 for lower-case letters. +When +.I lc +is at least 3, the literal coding can take advantage of +this property in the uncompressed data. +.IP "" +The default value (3) is usually good. +If you want maximum compression, test +.BR lc=4 . +Sometimes it helps a little, and +sometimes it makes compression worse. +If it makes it worse, test e.g.\& +.B lc=2 +too. +.TP +.BI lp= lp +Specify the number of literal position bits. +The minimum is 0 and the maximum is 4; the default is 0. +.IP "" +.I Lp +affects what kind of alignment in the uncompressed data is +assumed when encoding literals. +See +.I pb +below for more information about alignment. +.TP +.BI pb= pb +Specify the number of position bits. +The minimum is 0 and the maximum is 4; the default is 2. +.IP "" +.I Pb +affects what kind of alignment in the uncompressed data is +assumed in general. +The default means four-byte alignment +.RI (2^ pb =2^2=4), +which is often a good choice when there's no better guess. +.IP "" +When the aligment is known, setting +.I pb +accordingly may reduce the file size a little. +E.g. with text files having one-byte +alignment (US-ASCII, ISO-8859-*, UTF-8), setting +.B pb=0 +can improve compression slightly. +For UTF-16 text, +.B pb=1 +is a good choice. +If the alignment is an odd number like 3 bytes, +.B pb=0 +might be the best choice. +.IP "" +Even though the assumed alignment can be adjusted with +.I pb +and +.IR lp , +LZMA1 and LZMA2 still slightly favor 16-byte alignment. +It might be worth taking into account when designing file formats +that are likely to be often compressed with LZMA1 or LZMA2. +.TP +.BI mf= mf +Match finder has a major effect on encoder speed, +memory usage, and compression ratio. +Usually Hash Chain match finders are faster than Binary Tree +match finders. +The default depends on the +.IR preset : +0 uses +.BR hc3 , +1\-3 +use +.BR hc4 , +and the rest use +.BR bt4 . +.IP "" +The following match finders are supported. +The memory usage formulas below are rough approximations, +which are closest to the reality when +.I dict +is a power of two. +.RS +.TP +.B hc3 +Hash Chain with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.br +.I dict +* 7.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 5.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B hc4 +Hash Chain with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.br +.I dict +* 7.5 (if +.I dict +<= 32 MiB); +.br +.I dict +* 6.5 (if +.I dict +> 32 MiB) +.TP +.B bt2 +Binary Tree with 2-byte hashing +.br +Minimum value for +.IR nice : +2 +.br +Memory usage: +.I dict +* 9.5 +.TP +.B bt3 +Binary Tree with 2- and 3-byte hashing +.br +Minimum value for +.IR nice : +3 +.br +Memory usage: +.br +.I dict +* 11.5 (if +.I dict +<= 16 MiB); +.br +.I dict +* 9.5 + 64 MiB (if +.I dict +> 16 MiB) +.TP +.B bt4 +Binary Tree with 2-, 3-, and 4-byte hashing +.br +Minimum value for +.IR nice : +4 +.br +Memory usage: +.br +.I dict +* 11.5 (if +.I dict +<= 32 MiB); +.br +.I dict +* 10.5 (if +.I dict +> 32 MiB) +.RE +.TP +.BI mode= mode +Compression +.I mode +specifies the method to analyze +the data produced by the match finder. +Supported +.I modes +are +.B fast +and +.BR normal . +The default is +.B fast +for +.I presets +0\-3 and +.B normal +for +.I presets +4\-9. +.IP "" +Usually +.B fast +is used with Hash Chain match finders and +.B normal +with Binary Tree match finders. +This is also what the +.I presets +do. +.TP +.BI nice= nice +Specify what is considered to be a nice length for a match. +Once a match of at least +.I nice +bytes is found, the algorithm stops +looking for possibly better matches. +.IP "" +.I Nice +can be 2\-273 bytes. +Higher values tend to give better compression ratio +at the expense of speed. +The default depends on the +.IR preset . +.TP +.BI depth= depth +Specify the maximum search depth in the match finder. +The default is the special value of 0, +which makes the compressor determine a reasonable +.I depth +from +.I mf +and +.IR nice . +.IP "" +Reasonable +.I depth +for Hash Chains is 4\-100 and 16\-1000 for Binary Trees. +Using very high values for +.I depth +can make the encoder extremely slow with some files. +Avoid setting the +.I depth +over 1000 unless you are prepared to interrupt +the compression in case it is taking far too long. +.RE +.IP "" +When decoding raw streams +.RB ( \-\-format=raw ), +LZMA2 needs only the dictionary +.IR size . +LZMA1 needs also +.IR lc , +.IR lp , +and +.IR pb . +.TP +\fB\-\-x86\fR[\fB=\fIoptions\fR] +.PD 0 +.TP +\fB\-\-powerpc\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-ia64\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-arm\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-armthumb\fR[\fB=\fIoptions\fR] +.TP +\fB\-\-sparc\fR[\fB=\fIoptions\fR] +.PD +Add a branch/call/jump (BCJ) filter to the filter chain. +These filters can be used only as a non-last filter +in the filter chain. +.IP "" +A BCJ filter converts relative addresses in +the machine code to their absolute counterparts. +This doesn't change the size of the data, +but it increases redundancy, +which can help LZMA2 to produce 0\-15\ % smaller +.B .xz +file. +The BCJ filters are always reversible, +so using a BCJ filter for wrong type of data +doesn't cause any data loss, although it may make +the compression ratio slightly worse. +.IP "" +It is fine to apply a BCJ filter on a whole executable; +there's no need to apply it only on the executable section. +Applying a BCJ filter on an archive that contains both executable +and non-executable files may or may not give good results, +so it generally isn't good to blindly apply a BCJ filter when +compressing binary packages for distribution. +.IP "" +These BCJ filters are very fast and +use insignificant amount of memory. +If a BCJ filter improves compression ratio of a file, +it can improve decompression speed at the same time. +This is because, on the same hardware, +the decompression speed of LZMA2 is roughly +a fixed number of bytes of compressed data per second. +.IP "" +These BCJ filters have known problems related to +the compression ratio: +.RS +.IP \(bu 3 +Some types of files containing executable code +(e.g. object files, static libraries, and Linux kernel modules) +have the addresses in the instructions filled with filler values. +These BCJ filters will still do the address conversion, +which will make the compression worse with these files. +.IP \(bu 3 +Applying a BCJ filter on an archive containing multiple similar +executables can make the compression ratio worse than not using +a BCJ filter. +This is because the BCJ filter doesn't detect the boundaries +of the executable files, and doesn't reset +the address conversion counter for each executable. +.RE +.IP "" +Both of the above problems will be fixed +in the future in a new filter. +The old BCJ filters will still be useful in embedded systems, +because the decoder of the new filter will be bigger +and use more memory. +.IP "" +Different instruction sets have have different alignment: +.RS +.RS +.PP +.TS +tab(;); +l n l +l n l. +Filter;Alignment;Notes +x86;1;32-bit or 64-bit x86 +PowerPC;4;Big endian only +ARM;4;Little endian only +ARM-Thumb;2;Little endian only +IA-64;16;Big or little endian +SPARC;4;Big or little endian +.TE +.RE +.RE +.IP "" +Since the BCJ-filtered data is usually compressed with LZMA2, +the compression ratio may be improved slightly if +the LZMA2 options are set to match the +alignment of the selected BCJ filter. +For example, with the IA-64 filter, it's good to set +.B pb=4 +with LZMA2 (2^4=16). +The x86 filter is an exception; +it's usually good to stick to LZMA2's default +four-byte alignment when compressing x86 executables. +.IP "" +All BCJ filters support the same +.IR options : +.RS +.TP +.BI start= offset +Specify the start +.I offset +that is used when converting between relative +and absolute addresses. +The +.I offset +must be a multiple of the alignment of the filter +(see the table above). +The default is zero. +In practice, the default is good; specifying a custom +.I offset +is almost never useful. +.RE +.TP +\fB\-\-delta\fR[\fB=\fIoptions\fR] +Add the Delta filter to the filter chain. +The Delta filter can be only used as a non-last filter +in the filter chain. +.IP "" +Currently only simple byte-wise delta calculation is supported. +It can be useful when compressing e.g. uncompressed bitmap images +or uncompressed PCM audio. +However, special purpose algorithms may give significantly better +results than Delta + LZMA2. +This is true especially with audio, +which compresses faster and better e.g. with +.BR flac (1). +.IP "" +Supported +.IR options : +.RS +.TP +.BI dist= distance +Specify the +.I distance +of the delta calculation in bytes. +.I distance +must be 1\-256. +The default is 1. +.IP "" +For example, with +.B dist=2 +and eight-byte input A1 B1 A2 B3 A3 B5 A4 B7, the output will be +A1 B1 01 02 01 02 01 02. +.RE +. +.SS "Other options" +.TP +.BR \-q ", " \-\-quiet +Suppress warnings and notices. +Specify this twice to suppress errors too. +This option has no effect on the exit status. +That is, even if a warning was suppressed, +the exit status to indicate a warning is still used. +.TP +.BR \-v ", " \-\-verbose +Be verbose. +If standard error is connected to a terminal, +.B xz +will display a progress indicator. +Specifying +.B \-\-verbose +twice will give even more verbose output. +.IP "" +The progress indicator shows the following information: +.RS +.IP \(bu 3 +Completion percentage is shown +if the size of the input file is known. +That is, the percentage cannot be shown in pipes. +.IP \(bu 3 +Amount of compressed data produced (compressing) +or consumed (decompressing). +.IP \(bu 3 +Amount of uncompressed data consumed (compressing) +or produced (decompressing). +.IP \(bu 3 +Compression ratio, which is calculated by dividing +the amount of compressed data processed so far by +the amount of uncompressed data processed so far. +.IP \(bu 3 +Compression or decompression speed. +This is measured as the amount of uncompressed data consumed +(compression) or produced (decompression) per second. +It is shown after a few seconds have passed since +.B xz +started processing the file. +.IP \(bu 3 +Elapsed time in the format M:SS or H:MM:SS. +.IP \(bu 3 +Estimated remaining time is shown +only when the size of the input file is +known and a couple of seconds have already passed since +.B xz +started processing the file. +The time is shown in a less precise format which +never has any colons, e.g. 2 min 30 s. +.RE +.IP "" +When standard error is not a terminal, +.B \-\-verbose +will make +.B xz +print the filename, compressed size, uncompressed size, +compression ratio, and possibly also the speed and elapsed time +on a single line to standard error after compressing or +decompressing the file. +The speed and elapsed time are included only when +the operation took at least a few seconds. +If the operation didn't finish, e.g. due to user interruption, +also the completion percentage is printed +if the size of the input file is known. +.TP +.BR \-Q ", " \-\-no\-warn +Don't set the exit status to 2 +even if a condition worth a warning was detected. +This option doesn't affect the verbosity level, thus both +.B \-\-quiet +and +.B \-\-no\-warn +have to be used to not display warnings and +to not alter the exit status. +.TP +.B \-\-robot +Print messages in a machine-parsable format. +This is intended to ease writing frontends that want to use +.B xz +instead of liblzma, which may be the case with various scripts. +The output with this option enabled is meant to be stable across +.B xz +releases. +See the section +.B "ROBOT MODE" +for details. +.TP +.BR \-\-info\-memory +Display, in human-readable format, how much physical memory (RAM) +.B xz +thinks the system has and the memory usage limits for compression +and decompression, and exit successfully. +.TP +.BR \-h ", " \-\-help +Display a help message describing the most commonly used options, +and exit successfully. +.TP +.BR \-H ", " \-\-long\-help +Display a help message describing all features of +.BR xz , +and exit successfully +.TP +.BR \-V ", " \-\-version +Display the version number of +.B xz +and liblzma in human readable format. +To get machine-parsable output, specify +.B \-\-robot +before +.BR \-\-version . +. +.SH "ROBOT MODE" +The robot mode is activated with the +.B \-\-robot +option. +It makes the output of +.B xz +easier to parse by other programs. +Currently +.B \-\-robot +is supported only together with +.BR \-\-version , +.BR \-\-info\-memory , +and +.BR \-\-list . +It will be supported for compression and +decompression in the future. +. +.SS Version +.B "xz \-\-robot \-\-version" +will print the version number of +.B xz +and liblzma in the following format: +.PP +.BI XZ_VERSION= XYYYZZZS +.br +.BI LIBLZMA_VERSION= XYYYZZZS +.TP +.I X +Major version. +.TP +.I YYY +Minor version. +Even numbers are stable. +Odd numbers are alpha or beta versions. +.TP +.I ZZZ +Patch level for stable releases or +just a counter for development releases. +.TP +.I S +Stability. +0 is alpha, 1 is beta, and 2 is stable. +.I S +should be always 2 when +.I YYY +is even. +.PP +.I XYYYZZZS +are the same on both lines if +.B xz +and liblzma are from the same XZ Utils release. +.PP +Examples: 4.999.9beta is +.B 49990091 +and +5.0.0 is +.BR 50000002 . +. +.SS "Memory limit information" +.B "xz \-\-robot \-\-info\-memory" +prints a single line with three tab-separated columns: +.IP 1. 4 +Total amount of physical memory (RAM) in bytes +.IP 2. 4 +Memory usage limit for compression in bytes. +A special value of zero indicates the default setting, +which for single-threaded mode is the same as no limit. +.IP 3. 4 +Memory usage limit for decompression in bytes. +A special value of zero indicates the default setting, +which for single-threaded mode is the same as no limit. +.PP +In the future, the output of +.B "xz \-\-robot \-\-info\-memory" +may have more columns, but never more than a single line. +. +.SS "List mode" +.B "xz \-\-robot \-\-list" +uses tab-separated output. +The first column of every line has a string +that indicates the type of the information found on that line: +.TP +.B name +This is always the first line when starting to list a file. +The second column on the line is the filename. +.TP +.B file +This line contains overall information about the +.B .xz +file. +This line is always printed after the +.B name +line. +.TP +.B stream +This line type is used only when +.B \-\-verbose +was specified. +There are as many +.B stream +lines as there are streams in the +.B .xz +file. +.TP +.B block +This line type is used only when +.B \-\-verbose +was specified. +There are as many +.B block +lines as there are blocks in the +.B .xz +file. +The +.B block +lines are shown after all the +.B stream +lines; different line types are not interleaved. +.TP +.B summary +This line type is used only when +.B \-\-verbose +was specified twice. +This line is printed after all +.B block +lines. +Like the +.B file +line, the +.B summary +line contains overall information about the +.B .xz +file. +.TP +.B totals +This line is always the very last line of the list output. +It shows the total counts and sizes. +.PP +The columns of the +.B file +lines: +.PD 0 +.RS +.IP 2. 4 +Number of streams in the file +.IP 3. 4 +Total number of blocks in the stream(s) +.IP 4. 4 +Compressed size of the file +.IP 5. 4 +Uncompressed size of the file +.IP 6. 4 +Compression ratio, for example +.BR 0.123. +If ratio is over 9.999, three dashes +.RB ( \-\-\- ) +are displayed instead of the ratio. +.IP 7. 4 +Comma-separated list of integrity check names. +The following strings are used for the known check types: +.BR None , +.BR CRC32 , +.BR CRC64 , +and +.BR SHA\-256 . +For unknown check types, +.BI Unknown\- N +is used, where +.I N +is the Check ID as a decimal number (one or two digits). +.IP 8. 4 +Total size of stream padding in the file +.RE +.PD +.PP +The columns of the +.B stream +lines: +.PD 0 +.RS +.IP 2. 4 +Stream number (the first stream is 1) +.IP 3. 4 +Number of blocks in the stream +.IP 4. 4 +Compressed start offset +.IP 5. 4 +Uncompressed start offset +.IP 6. 4 +Compressed size (does not include stream padding) +.IP 7. 4 +Uncompressed size +.IP 8. 4 +Compression ratio +.IP 9. 4 +Name of the integrity check +.IP 10. 4 +Size of stream padding +.RE +.PD +.PP +The columns of the +.B block +lines: +.PD 0 +.RS +.IP 2. 4 +Number of the stream containing this block +.IP 3. 4 +Block number relative to the beginning of the stream +(the first block is 1) +.IP 4. 4 +Block number relative to the beginning of the file +.IP 5. 4 +Compressed start offset relative to the beginning of the file +.IP 6. 4 +Uncompressed start offset relative to the beginning of the file +.IP 7. 4 +Total compressed size of the block (includes headers) +.IP 8. 4 +Uncompressed size +.IP 9. 4 +Compression ratio +.IP 10. 4 +Name of the integrity check +.RE +.PD +.PP +If +.B \-\-verbose +was specified twice, additional columns are included on the +.B block +lines. +These are not displayed with a single +.BR \-\-verbose , +because getting this information requires many seeks +and can thus be slow: +.PD 0 +.RS +.IP 11. 4 +Value of the integrity check in hexadecimal +.IP 12. 4 +Block header size +.IP 13. 4 +Block flags: +.B c +indicates that compressed size is present, and +.B u +indicates that uncompressed size is present. +If the flag is not set, a dash +.RB ( \- ) +is shown instead to keep the string length fixed. +New flags may be added to the end of the string in the future. +.IP 14. 4 +Size of the actual compressed data in the block (this excludes +the block header, block padding, and check fields) +.IP 15. 4 +Amount of memory (in bytes) required to decompress +this block with this +.B xz +version +.IP 16. 4 +Filter chain. +Note that most of the options used at compression time +cannot be known, because only the options +that are needed for decompression are stored in the +.B .xz +headers. +.RE +.PD +.PP +The columns of the +.B summary +lines: +.PD 0 +.RS +.IP 2. 4 +Amount of memory (in bytes) required to decompress +this file with this +.B xz +version +.IP 3. 4 +.B yes +or +.B no +indicating if all block headers have both compressed size and +uncompressed size stored in them +.PP +.I Since +.B xz +.I 5.1.2alpha: +.IP 4. 4 +Minimum +.B xz +version required to decompress the file +.RE +.PD +.PP +The columns of the +.B totals +line: +.PD 0 +.RS +.IP 2. 4 +Number of streams +.IP 3. 4 +Number of blocks +.IP 4. 4 +Compressed size +.IP 5. 4 +Uncompressed size +.IP 6. 4 +Average compression ratio +.IP 7. 4 +Comma-separated list of integrity check names +that were present in the files +.IP 8. 4 +Stream padding size +.IP 9. 4 +Number of files. +This is here to +keep the order of the earlier columns the same as on +.B file +lines. +.PD +.RE +.PP +If +.B \-\-verbose +was specified twice, additional columns are included on the +.B totals +line: +.PD 0 +.RS +.IP 10. 4 +Maximum amount of memory (in bytes) required to decompress +the files with this +.B xz +version +.IP 11. 4 +.B yes +or +.B no +indicating if all block headers have both compressed size and +uncompressed size stored in them +.PP +.I Since +.B xz +.I 5.1.2alpha: +.IP 12. 4 +Minimum +.B xz +version required to decompress the file +.RE +.PD +.PP +Future versions may add new line types and +new columns can be added to the existing line types, +but the existing columns won't be changed. +. +.SH "EXIT STATUS" +.TP +.B 0 +All is good. +.TP +.B 1 +An error occurred. +.TP +.B 2 +Something worth a warning occurred, +but no actual errors occurred. +.PP +Notices (not warnings or errors) printed on standard error +don't affect the exit status. +. +.SH ENVIRONMENT +.B xz +parses space-separated lists of options +from the environment variables +.B XZ_DEFAULTS +and +.BR XZ_OPT , +in this order, before parsing the options from the command line. +Note that only options are parsed from the environment variables; +all non-options are silently ignored. +Parsing is done with +.BR getopt_long (3) +which is used also for the command line arguments. +.TP +.B XZ_DEFAULTS +User-specific or system-wide default options. +Typically this is set in a shell initialization script to enable +.BR xz 's +memory usage limiter by default. +Excluding shell initialization scripts +and similar special cases, scripts must never set or unset +.BR XZ_DEFAULTS . +.TP +.B XZ_OPT +This is for passing options to +.B xz +when it is not possible to set the options directly on the +.B xz +command line. +This is the case e.g. when +.B xz +is run by a script or tool, e.g. GNU +.BR tar (1): +.RS +.RS +.PP +.nf +.ft CW +XZ_OPT=\-2v tar caf foo.tar.xz foo +.ft R +.fi +.RE +.RE +.IP "" +Scripts may use +.B XZ_OPT +e.g. to set script-specific default compression options. +It is still recommended to allow users to override +.B XZ_OPT +if that is reasonable, e.g. in +.BR sh (1) +scripts one may use something like this: +.RS +.RS +.PP +.nf +.ft CW +XZ_OPT=${XZ_OPT\-"\-7e"} +export XZ_OPT +.ft R +.fi +.RE +.RE +. +.SH "LZMA UTILS COMPATIBILITY" +The command line syntax of +.B xz +is practically a superset of +.BR lzma , +.BR unlzma , +and +.BR lzcat +as found from LZMA Utils 4.32.x. +In most cases, it is possible to replace +LZMA Utils with XZ Utils without breaking existing scripts. +There are some incompatibilities though, +which may sometimes cause problems. +. +.SS "Compression preset levels" +The numbering of the compression level presets is not identical in +.B xz +and LZMA Utils. +The most important difference is how dictionary sizes +are mapped to different presets. +Dictionary size is roughly equal to the decompressor memory usage. +.RS +.PP +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils +\-0;256 KiB;N/A +\-1;1 MiB;64 KiB +\-2;2 MiB;1 MiB +\-3;4 MiB;512 KiB +\-4;4 MiB;1 MiB +\-5;8 MiB;2 MiB +\-6;8 MiB;4 MiB +\-7;16 MiB;8 MiB +\-8;32 MiB;16 MiB +\-9;64 MiB;32 MiB +.TE +.RE +.PP +The dictionary size differences affect +the compressor memory usage too, +but there are some other differences between +LZMA Utils and XZ Utils, which +make the difference even bigger: +.RS +.PP +.TS +tab(;); +c c c +c n n. +Level;xz;LZMA Utils 4.32.x +\-0;3 MiB;N/A +\-1;9 MiB;2 MiB +\-2;17 MiB;12 MiB +\-3;32 MiB;12 MiB +\-4;48 MiB;16 MiB +\-5;94 MiB;26 MiB +\-6;94 MiB;45 MiB +\-7;186 MiB;83 MiB +\-8;370 MiB;159 MiB +\-9;674 MiB;311 MiB +.TE +.RE +.PP +The default preset level in LZMA Utils is +.B \-7 +while in XZ Utils it is +.BR \-6 , +so both use an 8 MiB dictionary by default. +. +.SS "Streamed vs. non-streamed .lzma files" +The uncompressed size of the file can be stored in the +.B .lzma +header. +LZMA Utils does that when compressing regular files. +The alternative is to mark that uncompressed size is unknown +and use end-of-payload marker to indicate +where the decompressor should stop. +LZMA Utils uses this method when uncompressed size isn't known, +which is the case for example in pipes. +.PP +.B xz +supports decompressing +.B .lzma +files with or without end-of-payload marker, but all +.B .lzma +files created by +.B xz +will use end-of-payload marker and have uncompressed size +marked as unknown in the +.B .lzma +header. +This may be a problem in some uncommon situations. +For example, a +.B .lzma +decompressor in an embedded device might work +only with files that have known uncompressed size. +If you hit this problem, you need to use LZMA Utils +or LZMA SDK to create +.B .lzma +files with known uncompressed size. +. +.SS "Unsupported .lzma files" +The +.B .lzma +format allows +.I lc +values up to 8, and +.I lp +values up to 4. +LZMA Utils can decompress files with any +.I lc +and +.IR lp , +but always creates files with +.B lc=3 +and +.BR lp=0 . +Creating files with other +.I lc +and +.I lp +is possible with +.B xz +and with LZMA SDK. +.PP +The implementation of the LZMA1 filter in liblzma +requires that the sum of +.I lc +and +.I lp +must not exceed 4. +Thus, +.B .lzma +files, which exceed this limitation, cannot be decompressed with +.BR xz . +.PP +LZMA Utils creates only +.B .lzma +files which have a dictionary size of +.RI "2^" n +(a power of 2) but accepts files with any dictionary size. +liblzma accepts only +.B .lzma +files which have a dictionary size of +.RI "2^" n +or +.RI "2^" n " + 2^(" n "\-1)." +This is to decrease false positives when detecting +.B .lzma +files. +.PP +These limitations shouldn't be a problem in practice, +since practically all +.B .lzma +files have been compressed with settings that liblzma will accept. +. +.SS "Trailing garbage" +When decompressing, +LZMA Utils silently ignore everything after the first +.B .lzma +stream. +In most situations, this is a bug. +This also means that LZMA Utils +don't support decompressing concatenated +.B .lzma +files. +.PP +If there is data left after the first +.B .lzma +stream, +.B xz +considers the file to be corrupt unless +.B \-\-single\-stream +was used. +This may break obscure scripts which have +assumed that trailing garbage is ignored. +. +.SH NOTES +. +.SS "Compressed output may vary" +The exact compressed output produced from +the same uncompressed input file +may vary between XZ Utils versions even if +compression options are identical. +This is because the encoder can be improved +(faster or better compression) +without affecting the file format. +The output can vary even between different +builds of the same XZ Utils version, +if different build options are used. +.PP +The above means that once +.B \-\-rsyncable +has been implemented, +the resulting files won't necessarily be rsyncable +unless both old and new files have been compressed +with the same xz version. +This problem can be fixed if a part of the encoder +implementation is frozen to keep rsyncable output +stable across xz versions. +. +.SS "Embedded .xz decompressors" +Embedded +.B .xz +decompressor implementations like XZ Embedded don't necessarily +support files created with integrity +.I check +types other than +.B none +and +.BR crc32 . +Since the default is +.BR \-\-check=crc64 , +you must use +.B \-\-check=none +or +.B \-\-check=crc32 +when creating files for embedded systems. +.PP +Outside embedded systems, all +.B .xz +format decompressors support all the +.I check +types, or at least are able to decompress +the file without verifying the +integrity check if the particular +.I check +is not supported. +.PP +XZ Embedded supports BCJ filters, +but only with the default start offset. +. +.SH EXAMPLES +. +.SS Basics +Compress the file +.I foo +into +.I foo.xz +using the default compression level +.RB ( \-6 ), +and remove +.I foo +if compression is successful: +.RS +.PP +.nf +.ft CW +xz foo +.ft R +.fi +.RE +.PP +Decompress +.I bar.xz +into +.I bar +and don't remove +.I bar.xz +even if decompression is successful: +.RS +.PP +.nf +.ft CW +xz \-dk bar.xz +.ft R +.fi +.RE +.PP +Create +.I baz.tar.xz +with the preset +.B \-4e +.RB ( "\-4 \-\-extreme" ), +which is slower than e.g. the default +.BR \-6 , +but needs less memory for compression and decompression (48\ MiB +and 5\ MiB, respectively): +.RS +.PP +.nf +.ft CW +tar cf \- baz | xz \-4e > baz.tar.xz +.ft R +.fi +.RE +.PP +A mix of compressed and uncompressed files can be decompressed +to standard output with a single command: +.RS +.PP +.nf +.ft CW +xz \-dcf a.txt b.txt.xz c.txt d.txt.lzma > abcd.txt +.ft R +.fi +.RE +. +.SS "Parallel compression of many files" +On GNU and *BSD, +.BR find (1) +and +.BR xargs (1) +can be used to parallelize compression of many files: +.RS +.PP +.nf +.ft CW +find . \-type f \e! \-name '*.xz' \-print0 \e + | xargs \-0r \-P4 \-n16 xz \-T1 +.ft R +.fi +.RE +.PP +The +.B \-P +option to +.BR xargs (1) +sets the number of parallel +.B xz +processes. +The best value for the +.B \-n +option depends on how many files there are to be compressed. +If there are only a couple of files, +the value should probably be 1; +with tens of thousands of files, +100 or even more may be appropriate to reduce the number of +.B xz +processes that +.BR xargs (1) +will eventually create. +.PP +The option +.B \-T1 +for +.B xz +is there to force it to single-threaded mode, because +.BR xargs (1) +is used to control the amount of parallelization. +. +.SS "Robot mode" +Calculate how many bytes have been saved in total +after compressing multiple files: +.RS +.PP +.nf +.ft CW +xz \-\-robot \-\-list *.xz | awk '/^totals/{print $5\-$4}' +.ft R +.fi +.RE +.PP +A script may want to know that it is using new enough +.BR xz . +The following +.BR sh (1) +script checks that the version number of the +.B xz +tool is at least 5.0.0. +This method is compatible with old beta versions, +which didn't support the +.B \-\-robot +option: +.RS +.PP +.nf +.ft CW +if ! eval "$(xz \-\-robot \-\-version 2> /dev/null)" || + [ "$XZ_VERSION" \-lt 50000002 ]; then + echo "Your xz is too old." +fi +unset XZ_VERSION LIBLZMA_VERSION +.ft R +.fi +.RE +.PP +Set a memory usage limit for decompression using +.BR XZ_OPT , +but if a limit has already been set, don't increase it: +.RS +.PP +.nf +.ft CW +NEWLIM=$((123 << 20)) # 123 MiB +OLDLIM=$(xz \-\-robot \-\-info\-memory | cut \-f3) +if [ $OLDLIM \-eq 0 \-o $OLDLIM \-gt $NEWLIM ]; then + XZ_OPT="$XZ_OPT \-\-memlimit\-decompress=$NEWLIM" + export XZ_OPT +fi +.ft R +.fi +.RE +. +.SS "Custom compressor filter chains" +The simplest use for custom filter chains is +customizing a LZMA2 preset. +This can be useful, +because the presets cover only a subset of the +potentially useful combinations of compression settings. +.PP +The CompCPU columns of the tables +from the descriptions of the options +.BR "\-0" " ... " "\-9" +and +.B \-\-extreme +are useful when customizing LZMA2 presets. +Here are the relevant parts collected from those two tables: +.RS +.PP +.TS +tab(;); +c c +n n. +Preset;CompCPU +\-0;0 +\-1;1 +\-2;2 +\-3;3 +\-4;4 +\-5;5 +\-6;6 +\-5e;7 +\-6e;8 +.TE +.RE +.PP +If you know that a file requires +somewhat big dictionary (e.g. 32 MiB) to compress well, +but you want to compress it quicker than +.B "xz \-8" +would do, a preset with a low CompCPU value (e.g. 1) +can be modified to use a bigger dictionary: +.RS +.PP +.nf +.ft CW +xz \-\-lzma2=preset=1,dict=32MiB foo.tar +.ft R +.fi +.RE +.PP +With certain files, the above command may be faster than +.B "xz \-6" +while compressing significantly better. +However, it must be emphasized that only some files benefit from +a big dictionary while keeping the CompCPU value low. +The most obvious situation, +where a big dictionary can help a lot, +is an archive containing very similar files +of at least a few megabytes each. +The dictionary size has to be significantly bigger +than any individual file to allow LZMA2 to take +full advantage of the similarities between consecutive files. +.PP +If very high compressor and decompressor memory usage is fine, +and the file being compressed is +at least several hundred megabytes, it may be useful +to use an even bigger dictionary than the 64 MiB that +.B "xz \-9" +would use: +.RS +.PP +.nf +.ft CW +xz \-vv \-\-lzma2=dict=192MiB big_foo.tar +.ft R +.fi +.RE +.PP +Using +.B \-vv +.RB ( "\-\-verbose \-\-verbose" ) +like in the above example can be useful +to see the memory requirements +of the compressor and decompressor. +Remember that using a dictionary bigger than +the size of the uncompressed file is waste of memory, +so the above command isn't useful for small files. +.PP +Sometimes the compression time doesn't matter, +but the decompressor memory usage has to be kept low +e.g. to make it possible to decompress the file on +an embedded system. +The following command uses +.B \-6e +.RB ( "\-6 \-\-extreme" ) +as a base and sets the dictionary to only 64\ KiB. +The resulting file can be decompressed with XZ Embedded +(that's why there is +.BR \-\-check=crc32 ) +using about 100\ KiB of memory. +.RS +.PP +.nf +.ft CW +xz \-\-check=crc32 \-\-lzma2=preset=6e,dict=64KiB foo +.ft R +.fi +.RE +.PP +If you want to squeeze out as many bytes as possible, +adjusting the number of literal context bits +.RI ( lc ) +and number of position bits +.RI ( pb ) +can sometimes help. +Adjusting the number of literal position bits +.RI ( lp ) +might help too, but usually +.I lc +and +.I pb +are more important. +E.g. a source code archive contains mostly US-ASCII text, +so something like the following might give +slightly (like 0.1\ %) smaller file than +.B "xz \-6e" +(try also without +.BR lc=4 ): +.RS +.PP +.nf +.ft CW +xz \-\-lzma2=preset=6e,pb=0,lc=4 source_code.tar +.ft R +.fi +.RE +.PP +Using another filter together with LZMA2 can improve +compression with certain file types. +E.g. to compress a x86-32 or x86-64 shared library +using the x86 BCJ filter: +.RS +.PP +.nf +.ft CW +xz \-\-x86 \-\-lzma2 libfoo.so +.ft R +.fi +.RE +.PP +Note that the order of the filter options is significant. +If +.B \-\-x86 +is specified after +.BR \-\-lzma2 , +.B xz +will give an error, +because there cannot be any filter after LZMA2, +and also because the x86 BCJ filter cannot be used +as the last filter in the chain. +.PP +The Delta filter together with LZMA2 +can give good results with bitmap images. +It should usually beat PNG, +which has a few more advanced filters than simple +delta but uses Deflate for the actual compression. +.PP +The image has to be saved in uncompressed format, +e.g. as uncompressed TIFF. +The distance parameter of the Delta filter is set +to match the number of bytes per pixel in the image. +E.g. 24-bit RGB bitmap needs +.BR dist=3 , +and it is also good to pass +.B pb=0 +to LZMA2 to accommodate the three-byte alignment: +.RS +.PP +.nf +.ft CW +xz \-\-delta=dist=3 \-\-lzma2=pb=0 foo.tiff +.ft R +.fi +.RE +.PP +If multiple images have been put into a single archive (e.g.\& +.BR .tar ), +the Delta filter will work on that too as long as all images +have the same number of bytes per pixel. +. +.SH "SEE ALSO" +.BR xzdec (1), +.BR xzdiff (1), +.BR xzgrep (1), +.BR xzless (1), +.BR xzmore (1), +.BR gzip (1), +.BR bzip2 (1), +.BR 7z (1) +.PP +XZ Utils: +.br +XZ Embedded: +.br +LZMA SDK: diff --git a/usr/share/man/man1/xzcat.1 b/usr/share/man/man1/xzcat.1 new file mode 120000 index 0000000..01b6808 --- /dev/null +++ b/usr/share/man/man1/xzcat.1 @@ -0,0 +1 @@ +xz.1 \ No newline at end of file diff --git a/usr/share/man/man1/xzcmp.1 b/usr/share/man/man1/xzcmp.1 new file mode 120000 index 0000000..e65fb97 --- /dev/null +++ b/usr/share/man/man1/xzcmp.1 @@ -0,0 +1 @@ +xzdiff.1 \ No newline at end of file diff --git a/usr/share/man/man1/xzdec.1 b/usr/share/man/man1/xzdec.1 new file mode 100644 index 0000000..1e5ced9 --- /dev/null +++ b/usr/share/man/man1/xzdec.1 @@ -0,0 +1,146 @@ +.\" +.\" Author: Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.TH XZDEC 1 "2013-06-30" "Tukaani" "XZ Utils" +.SH NAME +xzdec, lzmadec \- Small .xz and .lzma decompressors +.SH SYNOPSIS +.B xzdec +.RI [ option... ] +.RI [ file... ] +.br +.B lzmadec +.RI [ option... ] +.RI [ file... ] +.SH DESCRIPTION +.B xzdec +is a liblzma-based decompression-only tool for +.B .xz +(and only +.BR .xz ) +files. +.B xzdec +is intended to work as a drop-in replacement for +.BR xz (1) +in the most common situations where a script +has been written to use +.B "xz \-\-decompress \-\-stdout" +(and possibly a few other commonly used options) to decompress +.B .xz +files. +.B lzmadec +is identical to +.B xzdec +except that +.B lzmadec +supports +.B .lzma +files instead of +.B .xz +files. +.PP +To reduce the size of the executable, +.B xzdec +doesn't support multithreading or localization, +and doesn't read options from +.B XZ_DEFAULTS +and +.B XZ_OPT +environment variables. +.B xzdec +doesn't support displaying intermediate progress information: sending +.B SIGINFO +to +.B xzdec +does nothing, but sending +.B SIGUSR1 +terminates the process instead of displaying progress information. +.SH OPTIONS +.TP +.BR \-d ", " \-\-decompress ", " \-\-uncompress +Ignored for +.BR xz (1) +compatibility. +.B xzdec +supports only decompression. +.TP +.BR \-k ", " \-\-keep +Ignored for +.BR xz (1) +compatibility. +.B xzdec +never creates or removes any files. +.TP +.BR \-c ", " \-\-stdout ", " \-\-to-stdout +Ignored for +.BR xz (1) +compatibility. +.B xzdec +always writes the decompressed data to standard output. +.TP +.BR \-q ", " \-\-quiet +Specifying this once does nothing since +.B xzdec +never displays any warnings or notices. +Specify this twice to suppress errors. +.TP +.BR \-Q ", " \-\-no-warn +Ignored for +.BR xz (1) +compatibility. +.B xzdec +never uses the exit status 2. +.TP +.BR \-h ", " \-\-help +Display a help message and exit successfully. +.TP +.BR \-V ", " \-\-version +Display the version number of +.B xzdec +and liblzma. +.SH "EXIT STATUS" +.TP +.B 0 +All was good. +.TP +.B 1 +An error occurred. +.PP +.B xzdec +doesn't have any warning messages like +.BR xz (1) +has, thus the exit status 2 is not used by +.BR xzdec . +.SH NOTES +Use +.BR xz (1) +instead of +.B xzdec +or +.B lzmadec +for normal everyday use. +.B xzdec +or +.B lzmadec +are meant only for situations where it is important to have +a smaller decompressor than the full-featured +.BR xz (1). +.PP +.B xzdec +and +.B lzmadec +are not really that small. +The size can be reduced further by dropping +features from liblzma at compile time, +but that shouldn't usually be done for executables distributed +in typical non-embedded operating system distributions. +If you need a truly small +.B .xz +decompressor, consider using XZ Embedded. +.SH "SEE ALSO" +.BR xz (1) +.PP +XZ Embedded: diff --git a/usr/share/man/man1/xzdiff.1 b/usr/share/man/man1/xzdiff.1 new file mode 100644 index 0000000..b33670c --- /dev/null +++ b/usr/share/man/man1/xzdiff.1 @@ -0,0 +1,77 @@ +.\" +.\" Original zdiff.1 for gzip: Jean-loup Gailly +.\" +.\" Modifications for XZ Utils: Lasse Collin +.\" Andrew Dudman +.\" +.\" License: GNU GPLv2+ +.\" +.TH XZDIFF 1 "2011-03-19" "Tukaani" "XZ Utils" +.SH NAME +xzcmp, xzdiff, lzcmp, lzdiff \- compare compressed files +.SH SYNOPSIS +.B xzcmp +.RI [ cmp_options "] " file1 " [" file2 ] +.br +.B xzdiff +.RI [ diff_options "] " file1 " [" file2 ] +.br +.B lzcmp +.RI [ cmp_options "] " file1 " [" file2 ] +.br +.B lzdiff +.RI [ diff_options "] " file1 " [" file2 ] +.SH DESCRIPTION +.B xzcmp +and +.B xzdiff +invoke +.BR cmp (1) +or +.BR diff (1) +on files compressed with +.BR xz (1), +.BR lzma (1), +.BR gzip (1), +.BR bzip2 (1), +or +.BR lzop (1). +All options specified are passed directly to +.BR cmp (1) +or +.BR diff (1). +If only one file is specified, then the files compared are +.I file1 +(which must have a suffix of a supported compression format) and +.I file1 +from which the compression format suffix has been stripped. +If two files are specified, +then they are uncompressed if necessary and fed to +.BR cmp (1) +or +.BR diff (1). +The exit status from +.BR cmp (1) +or +.BR diff (1) +is preserved. +.PP +The names +.B lzcmp +and +.B lzdiff +are provided for backward compatibility with LZMA Utils. +.SH "SEE ALSO" +.BR cmp (1), +.BR diff (1), +.BR xz (1), +.BR gzip (1), +.BR bzip2 (1), +.BR lzop (1), +.BR zdiff (1) +.SH BUGS +Messages from the +.BR cmp (1) +or +.BR diff (1) +programs refer to temporary filenames instead of those specified. diff --git a/usr/share/man/man1/xzegrep.1 b/usr/share/man/man1/xzegrep.1 new file mode 120000 index 0000000..bebd8a4 --- /dev/null +++ b/usr/share/man/man1/xzegrep.1 @@ -0,0 +1 @@ +xzgrep.1 \ No newline at end of file diff --git a/usr/share/man/man1/xzfgrep.1 b/usr/share/man/man1/xzfgrep.1 new file mode 120000 index 0000000..bebd8a4 --- /dev/null +++ b/usr/share/man/man1/xzfgrep.1 @@ -0,0 +1 @@ +xzgrep.1 \ No newline at end of file diff --git a/usr/share/man/man1/xzgrep.1 b/usr/share/man/man1/xzgrep.1 new file mode 100644 index 0000000..4bddbe2 --- /dev/null +++ b/usr/share/man/man1/xzgrep.1 @@ -0,0 +1,98 @@ +.\" +.\" Original zgrep.1 for gzip: Jean-loup Gailly +.\" Charles Levert +.\" +.\" Modifications for XZ Utils: Lasse Collin +.\" +.\" License: GNU GPLv2+ +.\" +.TH XZGREP 1 "2011-03-19" "Tukaani" "XZ Utils" +.SH NAME +xzgrep \- search compressed files for a regular expression +.SH SYNOPSIS +.B xzgrep +.RI [ grep_options ] +.RB [ \-e ] +.I pattern +.IR file "..." +.br +.B xzegrep +.RB ... +.br +.B xzfgrep +.RB ... +.br +.B lzgrep +.RB ... +.br +.B lzegrep +.RB ... +.br +.B lzfgrep +.RB ... +.SH DESCRIPTION +.B xzgrep +invokes +.BR grep (1) +on +.I files +which may be either uncompressed or compressed with +.BR xz (1), +.BR lzma (1), +.BR gzip (1), +.BR bzip2 (1), +or +.BR lzop (1). +All options specified are passed directly to +.BR grep (1). +.PP +If no +.I file +is specified, then standard input is decompressed if necessary +and fed to +.BR grep (1). +When reading from standard input, +.BR gzip (1), +.BR bzip2 (1), +and +.BR lzop (1) +compressed files are not supported. +.PP +If +.B xzgrep +is invoked as +.B xzegrep +or +.B xzfgrep +then +.BR egrep (1) +or +.BR fgrep (1) +is used instead of +.BR grep (1). +The same applies to names +.BR lzgrep , +.BR lzegrep , +and +.BR lzfgrep , +which are provided for backward compatibility with LZMA Utils. +.PP +.SH ENVIRONMENT +.TP +.B GREP +If the +.B GREP +environment variable is set, +.B xzgrep +uses it instead of +.BR grep (1), +.BR egrep (1), +or +.BR fgrep (1). +.SH "SEE ALSO" +.BR grep (1), +.BR xz (1), +.BR gzip (1), +.BR bzip2 (1), +.BR lzop (1), +.BR zgrep (1) diff --git a/usr/share/man/man1/xzless.1 b/usr/share/man/man1/xzless.1 new file mode 100644 index 0000000..2d05459 --- /dev/null +++ b/usr/share/man/man1/xzless.1 @@ -0,0 +1,69 @@ +.\" +.\" Authors: Andrew Dudman +.\" Lasse Collin +.\" +.\" This file has been put into the public domain. +.\" You can do whatever you want with this file. +.\" +.\" (Note that this file is not based on gzip's zless.1.) +.\" +.TH XZLESS 1 "2010-09-27" "Tukaani" "XZ Utils" +.SH NAME +xzless, lzless \- view xz or lzma compressed (text) files +.SH SYNOPSIS +.B xzless +.RI [ file ...] +.br +.B lzless +.RI [ file ...] +.SH DESCRIPTION +.B xzless +is a filter that displays text from compressed files to a terminal. +It works on files compressed with +.BR xz (1) +or +.BR lzma (1). +If no +.I files +are given, +.B xzless +reads from standard input. +.PP +.B xzless +uses +.BR less (1) +to present its output. +Unlike +.BR xzmore , +its choice of pager cannot be altered by +setting an environment variable. +Commands are based on both +.BR more (1) +and +.BR vi (1) +and allow back and forth movement and searching. +See the +.BR less (1) +manual for more information. +.PP +The command named +.B lzless +is provided for backward compatibility with LZMA Utils. +.SH ENVIRONMENT +.TP +.B LESSMETACHARS +A list of characters special to the shell. +Set by +.B xzless +unless it is already set in the environment. +.TP +.B LESSOPEN +Set to a command line to invoke the +.BR xz (1) +decompressor for preprocessing the input files to +.BR less (1). +.SH "SEE ALSO" +.BR less (1), +.BR xz (1), +.BR xzmore (1), +.BR zless (1) diff --git a/usr/share/man/man1/xzmore.1 b/usr/share/man/man1/xzmore.1 new file mode 100644 index 0000000..9613974 --- /dev/null +++ b/usr/share/man/man1/xzmore.1 @@ -0,0 +1,55 @@ +.\" +.\" Original zdiff.1 for gzip: Jean-loup Gailly +.\" Modifications for XZ Utils: Lasse Collin +.\" +.\" License: GNU GPLv2+ +.\" +.TH XZMORE 1 "2013-06-30" "Tukaani" "XZ Utils" +.SH NAME +xzmore, lzmore \- view xz or lzma compressed (text) files +.SH SYNOPSIS +.B xzmore +.RI [ file... ] +.br +.B lzmore +.RI [ file... ] +.SH DESCRIPTION +.B xzmore +is a filter which allows examination of +.BR xz (1) +or +.BR lzma (1) +compressed text files one screenful at a time +on a soft-copy terminal. +.PP +To use a pager other than the default +.B more, +set environment variable +.B PAGER +to the name of the desired program. +The name +.B lzmore +is provided for backward compatibility with LZMA Utils. +.TP +.BR e " or " q +When the prompt \-\-More\-\-(Next file: +.IR file ) +is printed, this command causes +.B xzmore +to exit. +.TP +.B s +When the prompt \-\-More\-\-(Next file: +.IR file ) +is printed, this command causes +.B xzmore +to skip the next file and continue. +.PP +For list of keyboard commands supported while actually viewing the +content of a file, refer to manual of the pager you use, usually +.BR more (1). +.SH "SEE ALSO" +.BR more (1), +.BR xz (1), +.BR xzless (1), +.BR zmore (1)