mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-09 19:35:51 +00:00
Bug 1575293 - Update lz4 to version 1.9.2. r=froydnj,jfkthame
Differential Revision: https://phabricator.services.mozilla.com/D42682
This commit is contained in:
parent
4cbccb42f6
commit
090467f15d
@ -1,122 +1,121 @@
|
||||
LZ4 - Library Files
|
||||
LZ4 - Extremely fast compression
|
||||
================================
|
||||
|
||||
The `/lib` directory contains many files, but depending on project's objectives,
|
||||
not all of them are necessary.
|
||||
LZ4 is lossless compression algorithm,
|
||||
providing compression speed > 500 MB/s per core,
|
||||
scalable with multi-cores CPU.
|
||||
It features an extremely fast decoder,
|
||||
with speed in multiple GB/s per core,
|
||||
typically reaching RAM speed limits on multi-core systems.
|
||||
|
||||
#### Minimal LZ4 build
|
||||
Speed can be tuned dynamically, selecting an "acceleration" factor
|
||||
which trades compression ratio for faster speed.
|
||||
On the other end, a high compression derivative, LZ4_HC, is also provided,
|
||||
trading CPU time for improved compression ratio.
|
||||
All versions feature the same decompression speed.
|
||||
|
||||
The minimum required is **`lz4.c`** and **`lz4.h`**,
|
||||
which provides the fast compression and decompression algorithms.
|
||||
They generate and decode data using the [LZ4 block format].
|
||||
LZ4 is also compatible with [dictionary compression](https://github.com/facebook/zstd#the-case-for-small-data-compression),
|
||||
both at [API](https://github.com/lz4/lz4/blob/v1.8.3/lib/lz4frame.h#L481) and [CLI](https://github.com/lz4/lz4/blob/v1.8.3/programs/lz4.1.md#operation-modifiers) levels.
|
||||
It can ingest any input file as dictionary, though only the final 64KB are used.
|
||||
This capability can be combined with the [Zstandard Dictionary Builder](https://github.com/facebook/zstd/blob/v1.3.5/programs/zstd.1.md#dictionary-builder),
|
||||
in order to drastically improve compression performance on small files.
|
||||
|
||||
|
||||
#### High Compression variant
|
||||
|
||||
For more compression ratio at the cost of compression speed,
|
||||
the High Compression variant called **lz4hc** is available.
|
||||
Add files **`lz4hc.c`** and **`lz4hc.h`**.
|
||||
This variant also compresses data using the [LZ4 block format],
|
||||
and depends on regular `lib/lz4.*` source files.
|
||||
LZ4 library is provided as open-source software using BSD 2-Clause license.
|
||||
|
||||
|
||||
#### Frame support, for interoperability
|
||||
|Branch |Status |
|
||||
|------------|---------|
|
||||
|master | [![Build Status][travisMasterBadge]][travisLink] [![Build status][AppveyorMasterBadge]][AppveyorLink] [![coverity][coverBadge]][coverlink] |
|
||||
|dev | [![Build Status][travisDevBadge]][travisLink] [![Build status][AppveyorDevBadge]][AppveyorLink] |
|
||||
|
||||
In order to produce compressed data compatible with `lz4` command line utility,
|
||||
it's necessary to use the [official interoperable frame format].
|
||||
This format is generated and decoded automatically by the **lz4frame** library.
|
||||
Its public API is described in `lib/lz4frame.h`.
|
||||
In order to work properly, lz4frame needs all other modules present in `/lib`,
|
||||
including, lz4 and lz4hc, and also **xxhash**.
|
||||
So it's necessary to include all `*.c` and `*.h` files present in `/lib`.
|
||||
[travisMasterBadge]: https://travis-ci.org/lz4/lz4.svg?branch=master "Continuous Integration test suite"
|
||||
[travisDevBadge]: https://travis-ci.org/lz4/lz4.svg?branch=dev "Continuous Integration test suite"
|
||||
[travisLink]: https://travis-ci.org/lz4/lz4
|
||||
[AppveyorMasterBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=master&svg=true "Windows test suite"
|
||||
[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=dev&svg=true "Windows test suite"
|
||||
[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/lz4-1lndh
|
||||
[coverBadge]: https://scan.coverity.com/projects/4735/badge.svg "Static code analysis of Master branch"
|
||||
[coverlink]: https://scan.coverity.com/projects/4735
|
||||
|
||||
> **Branch Policy:**
|
||||
> - The "master" branch is considered stable, at all times.
|
||||
> - The "dev" branch is the one where all contributions must be merged
|
||||
before being promoted to master.
|
||||
> + If you plan to propose a patch, please commit into the "dev" branch,
|
||||
or its own feature branch.
|
||||
Direct commit to "master" are not permitted.
|
||||
|
||||
Benchmarks
|
||||
-------------------------
|
||||
|
||||
The benchmark uses [lzbench], from @inikep
|
||||
compiled with GCC v8.2.0 on Linux 64-bits (Ubuntu 4.18.0-17).
|
||||
The reference system uses a Core i7-9700K CPU @ 4.9GHz (w/ turbo boost).
|
||||
Benchmark evaluates the compression of reference [Silesia Corpus]
|
||||
in single-thread mode.
|
||||
|
||||
[lzbench]: https://github.com/inikep/lzbench
|
||||
[Silesia Corpus]: http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
|
||||
|
||||
| Compressor | Ratio | Compression | Decompression |
|
||||
| ---------- | ----- | ----------- | ------------- |
|
||||
| memcpy | 1.000 | 13700 MB/s | 13700 MB/s |
|
||||
|**LZ4 default (v1.9.0)** |**2.101**| **780 MB/s**| **4970 MB/s** |
|
||||
| LZO 2.09 | 2.108 | 670 MB/s | 860 MB/s |
|
||||
| QuickLZ 1.5.0 | 2.238 | 575 MB/s | 780 MB/s |
|
||||
| Snappy 1.1.4 | 2.091 | 565 MB/s | 1950 MB/s |
|
||||
| [Zstandard] 1.4.0 -1 | 2.883 | 515 MB/s | 1380 MB/s |
|
||||
| LZF v3.6 | 2.073 | 415 MB/s | 910 MB/s |
|
||||
| [zlib] deflate 1.2.11 -1| 2.730 | 100 MB/s | 415 MB/s |
|
||||
|**LZ4 HC -9 (v1.9.0)** |**2.721**| 41 MB/s | **4900 MB/s** |
|
||||
| [zlib] deflate 1.2.11 -6| 3.099 | 36 MB/s | 445 MB/s |
|
||||
|
||||
[zlib]: http://www.zlib.net/
|
||||
[Zstandard]: http://www.zstd.net/
|
||||
|
||||
LZ4 is also compatible and optimized for x32 mode,
|
||||
for which it provides additional speed performance.
|
||||
|
||||
|
||||
#### Advanced / Experimental API
|
||||
Installation
|
||||
-------------------------
|
||||
|
||||
Definitions which are not guaranteed to remain stable in future versions,
|
||||
are protected behind macros, such as `LZ4_STATIC_LINKING_ONLY`.
|
||||
As the name implies, these definitions can only be invoked
|
||||
in the context of static linking ***only***.
|
||||
Otherwise, dependent application may fail on API or ABI break in the future.
|
||||
The associated symbols are also not present in dynamic library by default.
|
||||
Should they be nonetheless needed, it's possible to force their publication
|
||||
by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`.
|
||||
|
||||
|
||||
#### Build macros
|
||||
|
||||
The following build macro can be selected at compilation time :
|
||||
|
||||
- `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop.
|
||||
This loops works great on x86/x64 cpus, and is automatically enabled on this platform.
|
||||
It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
|
||||
For example, with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
|
||||
and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
|
||||
|
||||
- `LZ4_DISTANCE_MAX` : control the maximum offset that the compressor will allow.
|
||||
Set to 65535 by default, which is the maximum value supported by lz4 format.
|
||||
Reducing maximum distance will reduce opportunities for LZ4 to find matches,
|
||||
hence will produce worse the compression ratio.
|
||||
However, a smaller max distance may allow compatibility with specific decoders using limited memory budget.
|
||||
This build macro only influences the compressed output of the compressor.
|
||||
|
||||
- `LZ4_DISABLE_DEPRECATE_WARNINGS` : invoking a deprecated function will make the compiler generate a warning.
|
||||
This is meant to invite users to update their source code.
|
||||
Should this be a problem, it's generally possible to make the compiler ignore these warnings,
|
||||
for example with `-Wno-deprecated-declarations` on `gcc`,
|
||||
or `_CRT_SECURE_NO_WARNINGS` for Visual Studio.
|
||||
Another method is to define `LZ4_DISABLE_DEPRECATE_WARNINGS`
|
||||
before including the LZ4 header files.
|
||||
|
||||
|
||||
#### Amalgamation
|
||||
|
||||
lz4 source code can be amalgamated into a single file.
|
||||
One can combine all source code into `lz4_all.c` by using following command:
|
||||
```
|
||||
cat lz4.c > lz4_all.c
|
||||
cat lz4hc.c >> lz4_all.c
|
||||
cat lz4frame.c >> lz4_all.c
|
||||
make
|
||||
make install # this command may require root permissions
|
||||
```
|
||||
(`cat` file order is important) then compile `lz4_all.c`.
|
||||
All `*.h` files present in `/lib` remain necessary to compile `lz4_all.c`.
|
||||
|
||||
LZ4's `Makefile` supports standard [Makefile conventions],
|
||||
including [staged installs], [redirection], or [command redefinition].
|
||||
It is compatible with parallel builds (`-j#`).
|
||||
|
||||
[Makefile conventions]: https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html
|
||||
[staged installs]: https://www.gnu.org/prep/standards/html_node/DESTDIR.html
|
||||
[redirection]: https://www.gnu.org/prep/standards/html_node/Directory-Variables.html
|
||||
[command redefinition]: https://www.gnu.org/prep/standards/html_node/Utilities-in-Makefiles.html
|
||||
|
||||
|
||||
#### Windows : using MinGW+MSYS to create DLL
|
||||
Documentation
|
||||
-------------------------
|
||||
|
||||
DLL can be created using MinGW+MSYS with the `make liblz4` command.
|
||||
This command creates `dll\liblz4.dll` and the import library `dll\liblz4.lib`.
|
||||
To override the `dlltool` command when cross-compiling on Linux, just set the `DLLTOOL` variable. Example of cross compilation on Linux with mingw-w64 64 bits:
|
||||
```
|
||||
make BUILD_STATIC=no CC=x86_64-w64-mingw32-gcc DLLTOOL=x86_64-w64-mingw32-dlltool OS=Windows_NT
|
||||
```
|
||||
The import library is only required with Visual C++.
|
||||
The header files `lz4.h`, `lz4hc.h`, `lz4frame.h` and the dynamic library
|
||||
`dll\liblz4.dll` are required to compile a project using gcc/MinGW.
|
||||
The dynamic library has to be added to linking options.
|
||||
It means that if a project that uses LZ4 consists of a single `test-dll.c`
|
||||
file it should be linked with `dll\liblz4.dll`. For example:
|
||||
```
|
||||
$(CC) $(CFLAGS) -Iinclude/ test-dll.c -o test-dll dll\liblz4.dll
|
||||
```
|
||||
The compiled executable will require LZ4 DLL which is available at `dll\liblz4.dll`.
|
||||
The raw LZ4 block compression format is detailed within [lz4_Block_format].
|
||||
|
||||
Arbitrarily long files or data streams are compressed using multiple blocks,
|
||||
for streaming requirements. These blocks are organized into a frame,
|
||||
defined into [lz4_Frame_format].
|
||||
Interoperable versions of LZ4 must also respect the frame format.
|
||||
|
||||
[lz4_Block_format]: doc/lz4_Block_format.md
|
||||
[lz4_Frame_format]: doc/lz4_Frame_format.md
|
||||
|
||||
|
||||
#### Miscellaneous
|
||||
Other source versions
|
||||
-------------------------
|
||||
|
||||
Other files present in the directory are not source code. There are :
|
||||
Beyond the C reference source,
|
||||
many contributors have created versions of lz4 in multiple languages
|
||||
(Java, C#, Python, Perl, Ruby, etc.).
|
||||
A list of known source ports is maintained on the [LZ4 Homepage].
|
||||
|
||||
- `LICENSE` : contains the BSD license text
|
||||
- `Makefile` : `make` script to compile and install lz4 library (static and dynamic)
|
||||
- `liblz4.pc.in` : for `pkg-config` (used in `make install`)
|
||||
- `README.md` : this file
|
||||
|
||||
[official interoperable frame format]: ../doc/lz4_Frame_format.md
|
||||
[LZ4 block format]: ../doc/lz4_Block_format.md
|
||||
|
||||
|
||||
#### License
|
||||
|
||||
All source material within __lib__ directory are BSD 2-Clause licensed.
|
||||
See [LICENSE](LICENSE) for details.
|
||||
The license is also reminded at the top of each source file.
|
||||
[LZ4 Homepage]: http://www.lz4.org
|
||||
|
432
mfbt/lz4/lz4.c
432
mfbt/lz4/lz4.c
@ -106,6 +106,7 @@
|
||||
#define LZ4_DISABLE_DEPRECATE_WARNINGS /* due to LZ4_decompress_safe_withPrefix64k */
|
||||
#endif
|
||||
|
||||
#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */
|
||||
#include "lz4.h"
|
||||
/* see also "memory routines" below */
|
||||
|
||||
@ -182,6 +183,60 @@
|
||||
#define MEM_INIT(p,v,s) memset((p),(v),(s))
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Common Constants
|
||||
**************************************/
|
||||
#define MINMATCH 4
|
||||
|
||||
#define WILDCOPYLENGTH 8
|
||||
#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
|
||||
#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
|
||||
#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
|
||||
#define FASTLOOP_SAFE_DISTANCE 64
|
||||
static const int LZ4_minLength = (MFLIMIT+1);
|
||||
|
||||
#define KB *(1 <<10)
|
||||
#define MB *(1 <<20)
|
||||
#define GB *(1U<<30)
|
||||
|
||||
#define LZ4_DISTANCE_ABSOLUTE_MAX 65535
|
||||
#if (LZ4_DISTANCE_MAX > LZ4_DISTANCE_ABSOLUTE_MAX) /* max supported by LZ4 format */
|
||||
# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
|
||||
#endif
|
||||
|
||||
#define ML_BITS 4
|
||||
#define ML_MASK ((1U<<ML_BITS)-1)
|
||||
#define RUN_BITS (8-ML_BITS)
|
||||
#define RUN_MASK ((1U<<RUN_BITS)-1)
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Error detection
|
||||
**************************************/
|
||||
#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
|
||||
# include <assert.h>
|
||||
#else
|
||||
# ifndef assert
|
||||
# define assert(condition) ((void)0)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */
|
||||
|
||||
#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
|
||||
# include <stdio.h>
|
||||
static int g_debuglog_enable = 1;
|
||||
# define DEBUGLOG(l, ...) { \
|
||||
if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
|
||||
fprintf(stderr, __FILE__ ": "); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fprintf(stderr, " \n"); \
|
||||
} }
|
||||
#else
|
||||
# define DEBUGLOG(l, ...) {} /* disabled */
|
||||
#endif
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Types
|
||||
**************************************/
|
||||
@ -317,6 +372,11 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
|
||||
#ifndef LZ4_FAST_DEC_LOOP
|
||||
# if defined(__i386__) || defined(__x86_64__)
|
||||
# define LZ4_FAST_DEC_LOOP 1
|
||||
# elif defined(__aarch64__) && !defined(__clang__)
|
||||
/* On aarch64, we disable this optimization for clang because on certain
|
||||
* mobile chipsets and clang, it reduces performance. For more information
|
||||
* refer to https://github.com/lz4/lz4/pull/707. */
|
||||
# define LZ4_FAST_DEC_LOOP 1
|
||||
# else
|
||||
# define LZ4_FAST_DEC_LOOP 0
|
||||
# endif
|
||||
@ -358,29 +418,35 @@ LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
|
||||
do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
|
||||
}
|
||||
|
||||
/* LZ4_memcpy_using_offset() presumes :
|
||||
* - dstEnd >= dstPtr + MINMATCH
|
||||
* - there is at least 8 bytes available to write after dstEnd */
|
||||
LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
|
||||
LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
|
||||
{
|
||||
BYTE v[8];
|
||||
|
||||
assert(dstEnd >= dstPtr + MINMATCH);
|
||||
LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
|
||||
|
||||
switch(offset) {
|
||||
case 1:
|
||||
memset(v, *srcPtr, 8);
|
||||
goto copy_loop;
|
||||
break;
|
||||
case 2:
|
||||
memcpy(v, srcPtr, 2);
|
||||
memcpy(&v[2], srcPtr, 2);
|
||||
memcpy(&v[4], &v[0], 4);
|
||||
goto copy_loop;
|
||||
break;
|
||||
case 4:
|
||||
memcpy(v, srcPtr, 4);
|
||||
memcpy(&v[4], srcPtr, 4);
|
||||
goto copy_loop;
|
||||
break;
|
||||
default:
|
||||
LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
|
||||
return;
|
||||
}
|
||||
|
||||
copy_loop:
|
||||
memcpy(dstPtr, v, 8);
|
||||
dstPtr += 8;
|
||||
while (dstPtr < dstEnd) {
|
||||
@ -391,63 +457,6 @@ LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const si
|
||||
#endif
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Common Constants
|
||||
**************************************/
|
||||
#define MINMATCH 4
|
||||
|
||||
#define WILDCOPYLENGTH 8
|
||||
#define LASTLITERALS 5 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
|
||||
#define MFLIMIT 12 /* see ../doc/lz4_Block_format.md#parsing-restrictions */
|
||||
#define MATCH_SAFEGUARD_DISTANCE ((2*WILDCOPYLENGTH) - MINMATCH) /* ensure it's possible to write 2 x wildcopyLength without overflowing output buffer */
|
||||
#define FASTLOOP_SAFE_DISTANCE 64
|
||||
static const int LZ4_minLength = (MFLIMIT+1);
|
||||
|
||||
#define KB *(1 <<10)
|
||||
#define MB *(1 <<20)
|
||||
#define GB *(1U<<30)
|
||||
|
||||
#ifndef LZ4_DISTANCE_MAX /* can be user - defined at compile time */
|
||||
# define LZ4_DISTANCE_MAX 65535
|
||||
#endif
|
||||
|
||||
#if (LZ4_DISTANCE_MAX > 65535) /* max supported by LZ4 format */
|
||||
# error "LZ4_DISTANCE_MAX is too big : must be <= 65535"
|
||||
#endif
|
||||
|
||||
#define ML_BITS 4
|
||||
#define ML_MASK ((1U<<ML_BITS)-1)
|
||||
#define RUN_BITS (8-ML_BITS)
|
||||
#define RUN_MASK ((1U<<RUN_BITS)-1)
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Error detection
|
||||
**************************************/
|
||||
#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=1)
|
||||
# include <assert.h>
|
||||
#else
|
||||
# ifndef assert
|
||||
# define assert(condition) ((void)0)
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define LZ4_STATIC_ASSERT(c) { enum { LZ4_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */
|
||||
|
||||
#if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
|
||||
# include <stdio.h>
|
||||
static int g_debuglog_enable = 1;
|
||||
# define DEBUGLOG(l, ...) { \
|
||||
if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
|
||||
fprintf(stderr, __FILE__ ": "); \
|
||||
fprintf(stderr, __VA_ARGS__); \
|
||||
fprintf(stderr, " \n"); \
|
||||
} }
|
||||
#else
|
||||
# define DEBUGLOG(l, ...) {} /* disabled */
|
||||
#endif
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Common functions
|
||||
**************************************/
|
||||
@ -460,7 +469,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
|
||||
_BitScanForward64( &r, (U64)val );
|
||||
return (int)(r>>3);
|
||||
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_ctzll((U64)val) >> 3);
|
||||
return (unsigned)__builtin_ctzll((U64)val) >> 3;
|
||||
# else
|
||||
static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
|
||||
0, 3, 1, 3, 1, 4, 2, 7,
|
||||
@ -478,7 +487,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
|
||||
_BitScanForward( &r, (U32)val );
|
||||
return (int)(r>>3);
|
||||
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_ctz((U32)val) >> 3);
|
||||
return (unsigned)__builtin_ctz((U32)val) >> 3;
|
||||
# else
|
||||
static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
|
||||
3, 2, 2, 1, 3, 2, 0, 1,
|
||||
@ -494,7 +503,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
|
||||
_BitScanReverse64( &r, val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_clzll((U64)val) >> 3);
|
||||
return (unsigned)__builtin_clzll((U64)val) >> 3;
|
||||
# else
|
||||
static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits.
|
||||
Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
|
||||
@ -511,7 +520,7 @@ static unsigned LZ4_NbCommonBytes (reg_t val)
|
||||
_BitScanReverse( &r, (unsigned long)val );
|
||||
return (unsigned)(r>>3);
|
||||
# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
|
||||
return (__builtin_clz((U32)val) >> 3);
|
||||
return (unsigned)__builtin_clz((U32)val) >> 3;
|
||||
# else
|
||||
unsigned r;
|
||||
if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
|
||||
@ -606,9 +615,11 @@ int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize);
|
||||
int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_dict, const char* source, char* dest, int srcSize);
|
||||
|
||||
int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize);
|
||||
int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
|
||||
int compressedSize, int maxOutputSize,
|
||||
const void* dictStart, size_t dictSize);
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
@ -643,6 +654,18 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
|
||||
return LZ4_hash4(LZ4_read32(p), tableType);
|
||||
}
|
||||
|
||||
static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
|
||||
{
|
||||
switch (tableType)
|
||||
{
|
||||
default: /* fallthrough */
|
||||
case clearedTable: { /* illegal! */ assert(0); return; }
|
||||
case byPtr: { const BYTE** hashTable = (const BYTE**)tableBase; hashTable[h] = NULL; return; }
|
||||
case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = 0; return; }
|
||||
case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = 0; return; }
|
||||
}
|
||||
}
|
||||
|
||||
static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
|
||||
{
|
||||
switch (tableType)
|
||||
@ -703,18 +726,19 @@ static const BYTE* LZ4_getPositionOnHash(U32 h, const void* tableBase, tableType
|
||||
{ const U16* const hashTable = (const U16*) tableBase; return hashTable[h] + srcBase; } /* default, to ensure a return */
|
||||
}
|
||||
|
||||
LZ4_FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p,
|
||||
const void* tableBase, tableType_t tableType,
|
||||
const BYTE* srcBase)
|
||||
LZ4_FORCE_INLINE const BYTE*
|
||||
LZ4_getPosition(const BYTE* p,
|
||||
const void* tableBase, tableType_t tableType,
|
||||
const BYTE* srcBase)
|
||||
{
|
||||
U32 const h = LZ4_hashPosition(p, tableType);
|
||||
return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
|
||||
}
|
||||
|
||||
LZ4_FORCE_INLINE void LZ4_prepareTable(
|
||||
LZ4_stream_t_internal* const cctx,
|
||||
const int inputSize,
|
||||
const tableType_t tableType) {
|
||||
LZ4_FORCE_INLINE void
|
||||
LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
|
||||
const int inputSize,
|
||||
const tableType_t tableType) {
|
||||
/* If compression failed during the previous step, then the context
|
||||
* is marked as dirty, therefore, it has to be fully reset.
|
||||
*/
|
||||
@ -729,9 +753,10 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(
|
||||
* out if it's safe to leave as is or whether it needs to be reset.
|
||||
*/
|
||||
if (cctx->tableType != clearedTable) {
|
||||
assert(inputSize >= 0);
|
||||
if (cctx->tableType != tableType
|
||||
|| (tableType == byU16 && cctx->currentOffset + inputSize >= 0xFFFFU)
|
||||
|| (tableType == byU32 && cctx->currentOffset > 1 GB)
|
||||
|| ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
|
||||
|| ((tableType == byU32) && cctx->currentOffset > 1 GB)
|
||||
|| tableType == byPtr
|
||||
|| inputSize >= 4 KB)
|
||||
{
|
||||
@ -811,9 +836,9 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
|
||||
DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
|
||||
/* If init conditions are not met, we don't have to mark stream
|
||||
* as having dirty context, since no action was taken yet */
|
||||
if (outputDirective == fillOutput && maxOutputSize < 1) return 0; /* Impossible to store anything */
|
||||
if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported inputSize, too large (or negative) */
|
||||
if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) return 0; /* Size too large (not within 64K limit) */
|
||||
if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
|
||||
if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported inputSize, too large (or negative) */
|
||||
if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */
|
||||
if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */
|
||||
assert(acceleration >= 1);
|
||||
|
||||
@ -841,6 +866,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
|
||||
for ( ; ; ) {
|
||||
const BYTE* match;
|
||||
BYTE* token;
|
||||
const BYTE* filledIp;
|
||||
|
||||
/* Find a match */
|
||||
if (tableType == byPtr) {
|
||||
@ -909,10 +935,14 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
|
||||
forwardH = LZ4_hashPosition(forwardIp, tableType);
|
||||
LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
|
||||
|
||||
if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) continue; /* match outside of valid area */
|
||||
DEBUGLOG(7, "candidate at pos=%u (offset=%u \n", matchIndex, current - matchIndex);
|
||||
if ((dictIssue == dictSmall) && (matchIndex < prefixIdxLimit)) { continue; } /* match outside of valid area */
|
||||
assert(matchIndex < current);
|
||||
if ((tableType != byU16) && (matchIndex+LZ4_DISTANCE_MAX < current)) continue; /* too far */
|
||||
if (tableType == byU16) assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* too_far presumed impossible with byU16 */
|
||||
if ( ((tableType != byU16) || (LZ4_DISTANCE_MAX < LZ4_DISTANCE_ABSOLUTE_MAX))
|
||||
&& (matchIndex+LZ4_DISTANCE_MAX < current)) {
|
||||
continue;
|
||||
} /* too far */
|
||||
assert((current - matchIndex) <= LZ4_DISTANCE_MAX); /* match now expected within distance */
|
||||
|
||||
if (LZ4_read32(match) == LZ4_read32(ip)) {
|
||||
if (maybe_extMem) offset = current - matchIndex;
|
||||
@ -923,15 +953,16 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
|
||||
}
|
||||
|
||||
/* Catch up */
|
||||
filledIp = ip;
|
||||
while (((ip>anchor) & (match > lowLimit)) && (unlikely(ip[-1]==match[-1]))) { ip--; match--; }
|
||||
|
||||
/* Encode Literals */
|
||||
{ unsigned const litLength = (unsigned)(ip - anchor);
|
||||
token = op++;
|
||||
if ((outputDirective == limitedOutput) && /* Check output buffer overflow */
|
||||
(unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) )
|
||||
(unlikely(op + litLength + (2 + 1 + LASTLITERALS) + (litLength/255) > olimit)) ) {
|
||||
return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
|
||||
|
||||
}
|
||||
if ((outputDirective == fillOutput) &&
|
||||
(unlikely(op + (litLength+240)/255 /* litlen */ + litLength /* literals */ + 2 /* offset */ + 1 /* token */ + MFLIMIT - MINMATCH /* min last literals so last match is <= end - MFLIMIT */ > olimit))) {
|
||||
op--;
|
||||
@ -1002,12 +1033,26 @@ _next_match:
|
||||
}
|
||||
|
||||
if ((outputDirective) && /* Check output buffer overflow */
|
||||
(unlikely(op + (1 + LASTLITERALS) + (matchCode>>8) > olimit)) ) {
|
||||
(unlikely(op + (1 + LASTLITERALS) + (matchCode+240)/255 > olimit)) ) {
|
||||
if (outputDirective == fillOutput) {
|
||||
/* Match description too long : reduce it */
|
||||
U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 2 - 1 - LASTLITERALS) * 255;
|
||||
U32 newMatchCode = 15 /* in token */ - 1 /* to avoid needing a zero byte */ + ((U32)(olimit - op) - 1 - LASTLITERALS) * 255;
|
||||
ip -= matchCode - newMatchCode;
|
||||
assert(newMatchCode < matchCode);
|
||||
matchCode = newMatchCode;
|
||||
if (unlikely(ip <= filledIp)) {
|
||||
/* We have already filled up to filledIp so if ip ends up less than filledIp
|
||||
* we have positions in the hash table beyond the current position. This is
|
||||
* a problem if we reuse the hash table. So we have to remove these positions
|
||||
* from the hash table.
|
||||
*/
|
||||
const BYTE* ptr;
|
||||
DEBUGLOG(5, "Clearing %u positions", (U32)(filledIp - ip));
|
||||
for (ptr = ip; ptr <= filledIp; ++ptr) {
|
||||
U32 const h = LZ4_hashPosition(ptr, tableType);
|
||||
LZ4_clearHash(h, cctx->hashTable, tableType);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
assert(outputDirective == limitedOutput);
|
||||
return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
|
||||
@ -1027,6 +1072,8 @@ _next_match:
|
||||
} else
|
||||
*token += (BYTE)(matchCode);
|
||||
}
|
||||
/* Ensure we have enough space for the last literals. */
|
||||
assert(!(outputDirective == fillOutput && op + 1 + LASTLITERALS > olimit));
|
||||
|
||||
anchor = ip;
|
||||
|
||||
@ -1076,7 +1123,7 @@ _next_match:
|
||||
LZ4_putIndexOnHash(current, h, cctx->hashTable, tableType);
|
||||
assert(matchIndex < current);
|
||||
if ( ((dictIssue==dictSmall) ? (matchIndex >= prefixIdxLimit) : 1)
|
||||
&& ((tableType==byU16) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
|
||||
&& (((tableType==byU16) && (LZ4_DISTANCE_MAX == LZ4_DISTANCE_ABSOLUTE_MAX)) ? 1 : (matchIndex+LZ4_DISTANCE_MAX >= current))
|
||||
&& (LZ4_read32(match) == LZ4_read32(ip)) ) {
|
||||
token=op++;
|
||||
*token=0;
|
||||
@ -1143,7 +1190,7 @@ int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int
|
||||
return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, tableType, noDict, noDictIssue, acceleration);
|
||||
}
|
||||
} else {
|
||||
if (inputSize < LZ4_64Klimit) {;
|
||||
if (inputSize < LZ4_64Klimit) {
|
||||
return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, byU16, noDict, noDictIssue, acceleration);
|
||||
} else {
|
||||
const tableType_t tableType = ((sizeof(void*)==4) && ((uptrval)source > LZ4_DISTANCE_MAX)) ? byPtr : byU32;
|
||||
@ -1306,12 +1353,12 @@ static size_t LZ4_stream_t_alignment(void)
|
||||
LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
|
||||
{
|
||||
DEBUGLOG(5, "LZ4_initStream");
|
||||
if (buffer == NULL) return NULL;
|
||||
if (size < sizeof(LZ4_stream_t)) return NULL;
|
||||
if (buffer == NULL) { return NULL; }
|
||||
if (size < sizeof(LZ4_stream_t)) { return NULL; }
|
||||
#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
|
||||
it reports an aligment of 8-bytes,
|
||||
while actually aligning LZ4_stream_t on 4 bytes. */
|
||||
if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) return NULL; /* alignment check */
|
||||
if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) { return NULL; } /* alignment check */
|
||||
#endif
|
||||
MEM_INIT(buffer, 0, sizeof(LZ4_stream_t));
|
||||
return (LZ4_stream_t*)buffer;
|
||||
@ -1361,18 +1408,18 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
|
||||
* there are only valid offsets in the window, which allows an optimization
|
||||
* in LZ4_compress_fast_continue() where it uses noDictIssue even when the
|
||||
* dictionary isn't a full 64k. */
|
||||
|
||||
if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
|
||||
base = dictEnd - 64 KB - dict->currentOffset;
|
||||
dict->dictionary = p;
|
||||
dict->dictSize = (U32)(dictEnd - p);
|
||||
dict->currentOffset += 64 KB;
|
||||
dict->tableType = tableType;
|
||||
|
||||
if (dictSize < (int)HASH_UNIT) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((dictEnd - p) > 64 KB) p = dictEnd - 64 KB;
|
||||
base = dictEnd - dict->currentOffset;
|
||||
dict->dictionary = p;
|
||||
dict->dictSize = (U32)(dictEnd - p);
|
||||
dict->tableType = tableType;
|
||||
|
||||
while (p <= dictEnd-HASH_UNIT) {
|
||||
LZ4_putPosition(p, dict->hashTable, tableType, base);
|
||||
p+=3;
|
||||
@ -1381,26 +1428,37 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
|
||||
return (int)dict->dictSize;
|
||||
}
|
||||
|
||||
void LZ4_attach_dictionary(LZ4_stream_t *working_stream, const LZ4_stream_t *dictionary_stream) {
|
||||
void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream) {
|
||||
const LZ4_stream_t_internal* dictCtx = dictionaryStream == NULL ? NULL :
|
||||
&(dictionaryStream->internal_donotuse);
|
||||
|
||||
DEBUGLOG(4, "LZ4_attach_dictionary (%p, %p, size %u)",
|
||||
workingStream, dictionaryStream,
|
||||
dictCtx != NULL ? dictCtx->dictSize : 0);
|
||||
|
||||
/* Calling LZ4_resetStream_fast() here makes sure that changes will not be
|
||||
* erased by subsequent calls to LZ4_resetStream_fast() in case stream was
|
||||
* marked as having dirty context, e.g. requiring full reset.
|
||||
*/
|
||||
LZ4_resetStream_fast(working_stream);
|
||||
LZ4_resetStream_fast(workingStream);
|
||||
|
||||
if (dictionary_stream != NULL) {
|
||||
if (dictCtx != NULL) {
|
||||
/* If the current offset is zero, we will never look in the
|
||||
* external dictionary context, since there is no value a table
|
||||
* entry can take that indicate a miss. In that case, we need
|
||||
* to bump the offset to something non-zero.
|
||||
*/
|
||||
if (working_stream->internal_donotuse.currentOffset == 0) {
|
||||
working_stream->internal_donotuse.currentOffset = 64 KB;
|
||||
if (workingStream->internal_donotuse.currentOffset == 0) {
|
||||
workingStream->internal_donotuse.currentOffset = 64 KB;
|
||||
}
|
||||
|
||||
/* Don't actually attach an empty dictionary.
|
||||
*/
|
||||
if (dictCtx->dictSize == 0) {
|
||||
dictCtx = NULL;
|
||||
}
|
||||
working_stream->internal_donotuse.dictCtx = &(dictionary_stream->internal_donotuse);
|
||||
} else {
|
||||
working_stream->internal_donotuse.dictCtx = NULL;
|
||||
}
|
||||
workingStream->internal_donotuse.dictCtx = dictCtx;
|
||||
}
|
||||
|
||||
|
||||
@ -1435,7 +1493,7 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
|
||||
|
||||
DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
|
||||
|
||||
if (streamPtr->dirty) return 0; /* Uninitialized structure detected */
|
||||
if (streamPtr->dirty) { return 0; } /* Uninitialized structure detected */
|
||||
LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */
|
||||
if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
|
||||
|
||||
@ -1532,8 +1590,8 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
|
||||
LZ4_stream_t_internal* const dict = &LZ4_dict->internal_donotuse;
|
||||
const BYTE* const previousDictEnd = dict->dictionary + dict->dictSize;
|
||||
|
||||
if ((U32)dictSize > 64 KB) dictSize = 64 KB; /* useless to define a dictionary > 64 KB */
|
||||
if ((U32)dictSize > dict->dictSize) dictSize = (int)dict->dictSize;
|
||||
if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
|
||||
if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
|
||||
|
||||
memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
|
||||
|
||||
@ -1607,7 +1665,7 @@ LZ4_decompress_generic(
|
||||
const size_t dictSize /* note : = 0 if noDict */
|
||||
)
|
||||
{
|
||||
if (src == NULL) return -1;
|
||||
if (src == NULL) { return -1; }
|
||||
|
||||
{ const BYTE* ip = (const BYTE*) src;
|
||||
const BYTE* const iend = ip + srcSize;
|
||||
@ -1636,9 +1694,13 @@ LZ4_decompress_generic(
|
||||
|
||||
/* Special cases */
|
||||
assert(lowPrefix <= op);
|
||||
if ((endOnInput) && (unlikely(outputSize==0))) return ((srcSize==1) && (*ip==0)) ? 0 : -1; /* Empty output buffer */
|
||||
if ((!endOnInput) && (unlikely(outputSize==0))) return (*ip==0 ? 1 : -1);
|
||||
if ((endOnInput) && unlikely(srcSize==0)) return -1;
|
||||
if ((endOnInput) && (unlikely(outputSize==0))) {
|
||||
/* Empty output buffer */
|
||||
if (partialDecoding) return 0;
|
||||
return ((srcSize==1) && (*ip==0)) ? 0 : -1;
|
||||
}
|
||||
if ((!endOnInput) && (unlikely(outputSize==0))) { return (*ip==0 ? 1 : -1); }
|
||||
if ((endOnInput) && unlikely(srcSize==0)) { return -1; }
|
||||
|
||||
/* Currently the fast loop shows a regression on qualcomm arm chips. */
|
||||
#if LZ4_FAST_DEC_LOOP
|
||||
@ -1651,7 +1713,7 @@ LZ4_decompress_generic(
|
||||
while (1) {
|
||||
/* Main fastloop assertion: We can always wildcopy FASTLOOP_SAFE_DISTANCE */
|
||||
assert(oend - op >= FASTLOOP_SAFE_DISTANCE);
|
||||
if (endOnInput) assert(ip < iend);
|
||||
if (endOnInput) { assert(ip < iend); }
|
||||
token = *ip++;
|
||||
length = token >> ML_BITS; /* literal length */
|
||||
|
||||
@ -1661,18 +1723,18 @@ LZ4_decompress_generic(
|
||||
if (length == RUN_MASK) {
|
||||
variable_length_error error = ok;
|
||||
length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
|
||||
if (error == initial_error) goto _output_error;
|
||||
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error; /* overflow detection */
|
||||
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error; /* overflow detection */
|
||||
if (error == initial_error) { goto _output_error; }
|
||||
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
|
||||
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
|
||||
|
||||
/* copy literals */
|
||||
cpy = op+length;
|
||||
LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
|
||||
if (endOnInput) { /* LZ4_decompress_safe() */
|
||||
if ((cpy>oend-32) || (ip+length>iend-32)) goto safe_literal_copy;
|
||||
if ((cpy>oend-32) || (ip+length>iend-32)) { goto safe_literal_copy; }
|
||||
LZ4_wildCopy32(op, ip, cpy);
|
||||
} else { /* LZ4_decompress_fast() */
|
||||
if (cpy>oend-8) goto safe_literal_copy;
|
||||
if (cpy>oend-8) { goto safe_literal_copy; }
|
||||
LZ4_wildCopy8(op, ip, cpy); /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
|
||||
* it doesn't know input length, and only relies on end-of-block properties */
|
||||
}
|
||||
@ -1682,14 +1744,14 @@ LZ4_decompress_generic(
|
||||
if (endOnInput) { /* LZ4_decompress_safe() */
|
||||
DEBUGLOG(7, "copy %u bytes in a 16-bytes stripe", (unsigned)length);
|
||||
/* We don't need to check oend, since we check it once for each loop below */
|
||||
if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) goto safe_literal_copy;
|
||||
if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
|
||||
/* Literals can only be 14, but hope compilers optimize if we copy by a register size */
|
||||
memcpy(op, ip, 16);
|
||||
} else { /* LZ4_decompress_fast() */
|
||||
/* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
|
||||
* it doesn't know input length, and relies on end-of-block properties */
|
||||
memcpy(op, ip, 8);
|
||||
if (length > 8) memcpy(op+8, ip+8, 8);
|
||||
if (length > 8) { memcpy(op+8, ip+8, 8); }
|
||||
}
|
||||
ip += length; op = cpy;
|
||||
}
|
||||
@ -1697,17 +1759,17 @@ LZ4_decompress_generic(
|
||||
/* get offset */
|
||||
offset = LZ4_readLE16(ip); ip+=2;
|
||||
match = op - offset;
|
||||
assert(match <= op);
|
||||
|
||||
/* get matchlength */
|
||||
length = token & ML_MASK;
|
||||
|
||||
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
|
||||
|
||||
if (length == ML_MASK) {
|
||||
variable_length_error error = ok;
|
||||
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
|
||||
length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
|
||||
if (error != ok) goto _output_error;
|
||||
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
|
||||
if (error != ok) { goto _output_error; }
|
||||
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
|
||||
length += MINMATCH;
|
||||
if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
|
||||
goto safe_match_copy;
|
||||
@ -1719,8 +1781,12 @@ LZ4_decompress_generic(
|
||||
}
|
||||
|
||||
/* Fastpath check: Avoids a branch in LZ4_wildCopy32 if true */
|
||||
if (!(dict == usingExtDict) || (match >= lowPrefix)) {
|
||||
if ((dict == withPrefix64k) || (match >= lowPrefix)) {
|
||||
if (offset >= 8) {
|
||||
assert(match >= lowPrefix);
|
||||
assert(match <= op);
|
||||
assert(op + 18 <= oend);
|
||||
|
||||
memcpy(op, match, 8);
|
||||
memcpy(op+8, match+8, 8);
|
||||
memcpy(op+16, match+16, 2);
|
||||
@ -1728,12 +1794,16 @@ LZ4_decompress_generic(
|
||||
continue;
|
||||
} } }
|
||||
|
||||
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
|
||||
/* match starting within external dictionary */
|
||||
if ((dict==usingExtDict) && (match < lowPrefix)) {
|
||||
if (unlikely(op+length > oend-LASTLITERALS)) {
|
||||
if (partialDecoding) length = MIN(length, (size_t)(oend-op));
|
||||
else goto _output_error; /* doesn't respect parsing restriction */
|
||||
}
|
||||
if (partialDecoding) {
|
||||
DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
|
||||
length = MIN(length, (size_t)(oend-op));
|
||||
} else {
|
||||
goto _output_error; /* end-of-block condition violated */
|
||||
} }
|
||||
|
||||
if (length <= (size_t)(lowPrefix-match)) {
|
||||
/* match fits entirely within external dictionary : just copy */
|
||||
@ -1748,7 +1818,7 @@ LZ4_decompress_generic(
|
||||
if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
|
||||
BYTE* const endOfMatch = op + restSize;
|
||||
const BYTE* copyFrom = lowPrefix;
|
||||
while (op < endOfMatch) *op++ = *copyFrom++;
|
||||
while (op < endOfMatch) { *op++ = *copyFrom++; }
|
||||
} else {
|
||||
memcpy(op, lowPrefix, restSize);
|
||||
op += restSize;
|
||||
@ -1821,11 +1891,11 @@ LZ4_decompress_generic(
|
||||
|
||||
/* decode literal length */
|
||||
if (length == RUN_MASK) {
|
||||
variable_length_error error = ok;
|
||||
length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
|
||||
if (error == initial_error) goto _output_error;
|
||||
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) goto _output_error; /* overflow detection */
|
||||
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) goto _output_error; /* overflow detection */
|
||||
variable_length_error error = ok;
|
||||
length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
|
||||
if (error == initial_error) { goto _output_error; }
|
||||
if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
|
||||
if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
|
||||
}
|
||||
|
||||
/* copy literals */
|
||||
@ -1837,21 +1907,56 @@ LZ4_decompress_generic(
|
||||
if ( ((endOnInput) && ((cpy>oend-MFLIMIT) || (ip+length>iend-(2+1+LASTLITERALS))) )
|
||||
|| ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
|
||||
{
|
||||
/* We've either hit the input parsing restriction or the output parsing restriction.
|
||||
* In the normal scenario, decoding a full block, it must be the last sequence,
|
||||
* otherwise it's an error (invalid input or dimensions).
|
||||
* In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
|
||||
*/
|
||||
if (partialDecoding) {
|
||||
if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); } /* Partial decoding : stop in the middle of literal segment */
|
||||
if ((endOnInput) && (ip+length > iend)) goto _output_error; /* Error : read attempt beyond end of input buffer */
|
||||
/* Since we are partial decoding we may be in this block because of the output parsing
|
||||
* restriction, which is not valid since the output buffer is allowed to be undersized.
|
||||
*/
|
||||
assert(endOnInput);
|
||||
DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
|
||||
DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
|
||||
DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
|
||||
DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
|
||||
/* Finishing in the middle of a literals segment,
|
||||
* due to lack of input.
|
||||
*/
|
||||
if (ip+length > iend) {
|
||||
length = (size_t)(iend-ip);
|
||||
cpy = op + length;
|
||||
}
|
||||
/* Finishing in the middle of a literals segment,
|
||||
* due to lack of output space.
|
||||
*/
|
||||
if (cpy > oend) {
|
||||
cpy = oend;
|
||||
assert(op<=oend);
|
||||
length = (size_t)(oend-op);
|
||||
}
|
||||
} else {
|
||||
if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */
|
||||
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */
|
||||
/* We must be on the last sequence because of the parsing limitations so check
|
||||
* that we exactly regenerate the original size (must be exact when !endOnInput).
|
||||
*/
|
||||
if ((!endOnInput) && (cpy != oend)) { goto _output_error; }
|
||||
/* We must be on the last sequence (or invalid) because of the parsing limitations
|
||||
* so check that we exactly consume the input and don't overrun the output buffer.
|
||||
*/
|
||||
if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
|
||||
}
|
||||
memcpy(op, ip, length);
|
||||
memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
|
||||
ip += length;
|
||||
op += length;
|
||||
if (!partialDecoding || (cpy == oend)) {
|
||||
/* Necessarily EOF, due to parsing restrictions */
|
||||
/* Necessarily EOF when !partialDecoding.
|
||||
* When partialDecoding, it is EOF if we've either
|
||||
* filled the output buffer or
|
||||
* can't proceed with reading an offset for following match.
|
||||
*/
|
||||
if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
|
||||
break;
|
||||
}
|
||||
|
||||
} else {
|
||||
LZ4_wildCopy8(op, ip, cpy); /* may overwrite up to WILDCOPYLENGTH beyond cpy */
|
||||
ip += length; op = cpy;
|
||||
@ -1865,13 +1970,6 @@ LZ4_decompress_generic(
|
||||
length = token & ML_MASK;
|
||||
|
||||
_copy_match:
|
||||
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
|
||||
if (!partialDecoding) {
|
||||
assert(oend > op);
|
||||
assert(oend - op >= 4);
|
||||
LZ4_write32(op, 0); /* silence an msan warning when offset==0; costs <1%; */
|
||||
} /* note : when partialDecoding, there is no guarantee that at least 4 bytes remain available in output buffer */
|
||||
|
||||
if (length == ML_MASK) {
|
||||
variable_length_error error = ok;
|
||||
length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
|
||||
@ -1883,6 +1981,7 @@ LZ4_decompress_generic(
|
||||
#if LZ4_FAST_DEC_LOOP
|
||||
safe_match_copy:
|
||||
#endif
|
||||
if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) goto _output_error; /* Error : offset outside buffers */
|
||||
/* match starting within external dictionary */
|
||||
if ((dict==usingExtDict) && (match < lowPrefix)) {
|
||||
if (unlikely(op+length > oend-LASTLITERALS)) {
|
||||
@ -1910,6 +2009,7 @@ LZ4_decompress_generic(
|
||||
} }
|
||||
continue;
|
||||
}
|
||||
assert(match >= lowPrefix);
|
||||
|
||||
/* copy match within block */
|
||||
cpy = op + length;
|
||||
@ -1921,16 +2021,17 @@ LZ4_decompress_generic(
|
||||
const BYTE* const matchEnd = match + mlen;
|
||||
BYTE* const copyEnd = op + mlen;
|
||||
if (matchEnd > op) { /* overlap copy */
|
||||
while (op < copyEnd) *op++ = *match++;
|
||||
while (op < copyEnd) { *op++ = *match++; }
|
||||
} else {
|
||||
memcpy(op, match, mlen);
|
||||
}
|
||||
op = copyEnd;
|
||||
if (op==oend) break;
|
||||
if (op == oend) { break; }
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(offset<8)) {
|
||||
LZ4_write32(op, 0); /* silence msan warning when offset==0 */
|
||||
op[0] = match[0];
|
||||
op[1] = match[1];
|
||||
op[2] = match[2];
|
||||
@ -1946,25 +2047,26 @@ LZ4_decompress_generic(
|
||||
|
||||
if (unlikely(cpy > oend-MATCH_SAFEGUARD_DISTANCE)) {
|
||||
BYTE* const oCopyLimit = oend - (WILDCOPYLENGTH-1);
|
||||
if (cpy > oend-LASTLITERALS) goto _output_error; /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
|
||||
if (cpy > oend-LASTLITERALS) { goto _output_error; } /* Error : last LASTLITERALS bytes must be literals (uncompressed) */
|
||||
if (op < oCopyLimit) {
|
||||
LZ4_wildCopy8(op, match, oCopyLimit);
|
||||
match += oCopyLimit - op;
|
||||
op = oCopyLimit;
|
||||
}
|
||||
while (op < cpy) *op++ = *match++;
|
||||
while (op < cpy) { *op++ = *match++; }
|
||||
} else {
|
||||
memcpy(op, match, 8);
|
||||
if (length > 16) LZ4_wildCopy8(op+8, match+8, cpy);
|
||||
if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
|
||||
}
|
||||
op = cpy; /* wildcopy correction */
|
||||
}
|
||||
|
||||
/* end of decoding */
|
||||
if (endOnInput)
|
||||
if (endOnInput) {
|
||||
return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
|
||||
else
|
||||
} else {
|
||||
return (int) (((const char*)ip)-src); /* Nb of input bytes read */
|
||||
}
|
||||
|
||||
/* Overflow error detected */
|
||||
_output_error:
|
||||
@ -2079,7 +2181,7 @@ LZ4_streamDecode_t* LZ4_createStreamDecode(void)
|
||||
|
||||
int LZ4_freeStreamDecode (LZ4_streamDecode_t* LZ4_stream)
|
||||
{
|
||||
if (LZ4_stream == NULL) return 0; /* support free on NULL */
|
||||
if (LZ4_stream == NULL) { return 0; } /* support free on NULL */
|
||||
FREEMEM(LZ4_stream);
|
||||
return 0;
|
||||
}
|
||||
@ -2214,18 +2316,22 @@ int LZ4_decompress_safe_usingDict(const char* source, char* dest, int compressed
|
||||
if (dictSize==0)
|
||||
return LZ4_decompress_safe(source, dest, compressedSize, maxOutputSize);
|
||||
if (dictStart+dictSize == dest) {
|
||||
if (dictSize >= 64 KB - 1)
|
||||
if (dictSize >= 64 KB - 1) {
|
||||
return LZ4_decompress_safe_withPrefix64k(source, dest, compressedSize, maxOutputSize);
|
||||
return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, dictSize);
|
||||
}
|
||||
assert(dictSize >= 0);
|
||||
return LZ4_decompress_safe_withSmallPrefix(source, dest, compressedSize, maxOutputSize, (size_t)dictSize);
|
||||
}
|
||||
return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, dictSize);
|
||||
assert(dictSize >= 0);
|
||||
return LZ4_decompress_safe_forceExtDict(source, dest, compressedSize, maxOutputSize, dictStart, (size_t)dictSize);
|
||||
}
|
||||
|
||||
int LZ4_decompress_fast_usingDict(const char* source, char* dest, int originalSize, const char* dictStart, int dictSize)
|
||||
{
|
||||
if (dictSize==0 || dictStart+dictSize == dest)
|
||||
return LZ4_decompress_fast(source, dest, originalSize);
|
||||
return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, dictSize);
|
||||
assert(dictSize >= 0);
|
||||
return LZ4_decompress_fast_extDict(source, dest, originalSize, dictStart, (size_t)dictSize);
|
||||
}
|
||||
|
||||
|
||||
@ -2237,9 +2343,9 @@ int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, in
|
||||
{
|
||||
return LZ4_compress_default(source, dest, inputSize, maxOutputSize);
|
||||
}
|
||||
int LZ4_compress(const char* source, char* dest, int inputSize)
|
||||
int LZ4_compress(const char* src, char* dest, int srcSize)
|
||||
{
|
||||
return LZ4_compress_default(source, dest, inputSize, LZ4_compressBound(inputSize));
|
||||
return LZ4_compress_default(src, dest, srcSize, LZ4_compressBound(srcSize));
|
||||
}
|
||||
int LZ4_compress_limitedOutput_withState (void* state, const char* src, char* dst, int srcSize, int dstSize)
|
||||
{
|
||||
|
176
mfbt/lz4/lz4.h
176
mfbt/lz4/lz4.h
@ -46,7 +46,7 @@ extern "C" {
|
||||
/**
|
||||
Introduction
|
||||
|
||||
LZ4 is lossless compression algorithm, providing compression speed at 500 MB/s per core,
|
||||
LZ4 is lossless compression algorithm, providing compression speed >500 MB/s per core,
|
||||
scalable with multi-cores CPU. It features an extremely fast decoder, with speed in
|
||||
multiple GB/s per core, typically reaching RAM speed limits on multi-core systems.
|
||||
|
||||
@ -58,16 +58,19 @@ extern "C" {
|
||||
- unbounded multiple steps (described as Streaming compression)
|
||||
|
||||
lz4.h generates and decodes LZ4-compressed blocks (doc/lz4_Block_format.md).
|
||||
Decompressing a block requires additional metadata, such as its compressed size.
|
||||
Decompressing such a compressed block requires additional metadata.
|
||||
Exact metadata depends on exact decompression function.
|
||||
For the typical case of LZ4_decompress_safe(),
|
||||
metadata includes block's compressed size, and maximum bound of decompressed size.
|
||||
Each application is free to encode and pass such metadata in whichever way it wants.
|
||||
|
||||
lz4.h only handle blocks, it can not generate Frames.
|
||||
|
||||
Blocks are different from Frames (doc/lz4_Frame_format.md).
|
||||
Frames bundle both blocks and metadata in a specified manner.
|
||||
This are required for compressed data to be self-contained and portable.
|
||||
Embedding metadata is required for compressed data to be self-contained and portable.
|
||||
Frame format is delivered through a companion API, declared in lz4frame.h.
|
||||
Note that the `lz4` CLI can only manage frames.
|
||||
The `lz4` CLI can only manage frames.
|
||||
*/
|
||||
|
||||
/*^***************************************************************
|
||||
@ -97,7 +100,7 @@ extern "C" {
|
||||
/*------ Version ------*/
|
||||
#define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */
|
||||
#define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */
|
||||
#define LZ4_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
|
||||
#define LZ4_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */
|
||||
|
||||
#define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE)
|
||||
|
||||
@ -129,29 +132,35 @@ LZ4LIB_API const char* LZ4_versionString (void); /**< library version string;
|
||||
* Simple Functions
|
||||
**************************************/
|
||||
/*! LZ4_compress_default() :
|
||||
Compresses 'srcSize' bytes from buffer 'src'
|
||||
into already allocated 'dst' buffer of size 'dstCapacity'.
|
||||
Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
|
||||
It also runs faster, so it's a recommended setting.
|
||||
If the function cannot compress 'src' into a more limited 'dst' budget,
|
||||
compression stops *immediately*, and the function result is zero.
|
||||
In which case, 'dst' content is undefined (invalid).
|
||||
srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
|
||||
dstCapacity : size of buffer 'dst' (which must be already allocated)
|
||||
@return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
|
||||
or 0 if compression fails
|
||||
Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
|
||||
*/
|
||||
* Compresses 'srcSize' bytes from buffer 'src'
|
||||
* into already allocated 'dst' buffer of size 'dstCapacity'.
|
||||
* Compression is guaranteed to succeed if 'dstCapacity' >= LZ4_compressBound(srcSize).
|
||||
* It also runs faster, so it's a recommended setting.
|
||||
* If the function cannot compress 'src' into a more limited 'dst' budget,
|
||||
* compression stops *immediately*, and the function result is zero.
|
||||
* In which case, 'dst' content is undefined (invalid).
|
||||
* srcSize : max supported value is LZ4_MAX_INPUT_SIZE.
|
||||
* dstCapacity : size of buffer 'dst' (which must be already allocated)
|
||||
* @return : the number of bytes written into buffer 'dst' (necessarily <= dstCapacity)
|
||||
* or 0 if compression fails
|
||||
* Note : This function is protected against buffer overflow scenarios (never writes outside 'dst' buffer, nor read outside 'source' buffer).
|
||||
*/
|
||||
LZ4LIB_API int LZ4_compress_default(const char* src, char* dst, int srcSize, int dstCapacity);
|
||||
|
||||
/*! LZ4_decompress_safe() :
|
||||
compressedSize : is the exact complete size of the compressed block.
|
||||
dstCapacity : is the size of destination buffer, which must be already allocated.
|
||||
@return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
|
||||
If destination buffer is not large enough, decoding will stop and output an error code (negative value).
|
||||
If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
Note : This function is protected against malicious data packets (never writes outside 'dst' buffer, nor read outside 'source' buffer).
|
||||
*/
|
||||
* compressedSize : is the exact complete size of the compressed block.
|
||||
* dstCapacity : is the size of destination buffer (which must be already allocated), presumed an upper bound of decompressed size.
|
||||
* @return : the number of bytes decompressed into destination buffer (necessarily <= dstCapacity)
|
||||
* If destination buffer is not large enough, decoding will stop and output an error code (negative value).
|
||||
* If the source stream is detected malformed, the function will stop decoding and return a negative result.
|
||||
* Note 1 : This function is protected against malicious data packets :
|
||||
* it will never writes outside 'dst' buffer, nor read outside 'source' buffer,
|
||||
* even if the compressed block is maliciously modified to order the decoder to do these actions.
|
||||
* In such case, the decoder stops immediately, and considers the compressed block malformed.
|
||||
* Note 2 : compressedSize and dstCapacity must be provided to the function, the compressed block does not contain them.
|
||||
* The implementation is free to send / store / derive this information in whichever way is most beneficial.
|
||||
* If there is a need for a different format which bundles together both compressed data and its metadata, consider looking at lz4frame.h instead.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe (const char* src, char* dst, int compressedSize, int dstCapacity);
|
||||
|
||||
|
||||
@ -211,25 +220,35 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt
|
||||
* Decompress an LZ4 compressed block, of size 'srcSize' at position 'src',
|
||||
* into destination buffer 'dst' of size 'dstCapacity'.
|
||||
* Up to 'targetOutputSize' bytes will be decoded.
|
||||
* The function stops decoding on reaching this objective,
|
||||
* which can boost performance when only the beginning of a block is required.
|
||||
* The function stops decoding on reaching this objective.
|
||||
* This can be useful to boost performance
|
||||
* whenever only the beginning of a block is required.
|
||||
*
|
||||
* @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity)
|
||||
* @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize)
|
||||
* If source stream is detected malformed, function returns a negative result.
|
||||
*
|
||||
* Note : @return can be < targetOutputSize, if compressed block contains less data.
|
||||
* Note 1 : @return can be < targetOutputSize, if compressed block contains less data.
|
||||
*
|
||||
* Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity,
|
||||
* and expects targetOutputSize <= dstCapacity.
|
||||
* It effectively stops decoding on reaching targetOutputSize,
|
||||
* Note 2 : targetOutputSize must be <= dstCapacity
|
||||
*
|
||||
* Note 3 : this function effectively stops decoding on reaching targetOutputSize,
|
||||
* so dstCapacity is kind of redundant.
|
||||
* This is because in a previous version of this function,
|
||||
* decoding operation would not "break" a sequence in the middle.
|
||||
* As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize,
|
||||
* This is because in older versions of this function,
|
||||
* decoding operation would still write complete sequences.
|
||||
* Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize,
|
||||
* it could write more bytes, though only up to dstCapacity.
|
||||
* Some "margin" used to be required for this operation to work properly.
|
||||
* This is no longer necessary.
|
||||
* The function nonetheless keeps its signature, in an effort to not break API.
|
||||
* Thankfully, this is no longer necessary.
|
||||
* The function nonetheless keeps the same signature, in an effort to preserve API compatibility.
|
||||
*
|
||||
* Note 4 : If srcSize is the exact size of the block,
|
||||
* then targetOutputSize can be any value,
|
||||
* including larger than the block's decompressed size.
|
||||
* The function will, at most, generate block's decompressed size.
|
||||
*
|
||||
* Note 5 : If srcSize is _larger_ than block's compressed size,
|
||||
* then targetOutputSize **MUST** be <= block's decompressed size.
|
||||
* Otherwise, *silent corruption will occur*.
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);
|
||||
|
||||
@ -388,6 +407,8 @@ LZ4LIB_API int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecod
|
||||
*/
|
||||
LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int srcSize, int dstCapcity, const char* dictStart, int dictSize);
|
||||
|
||||
#endif /* LZ4_H_2983827168210 */
|
||||
|
||||
|
||||
/*^*************************************
|
||||
* !!!!!! STATIC LINKING ONLY !!!!!!
|
||||
@ -413,14 +434,17 @@ LZ4LIB_API int LZ4_decompress_safe_usingDict (const char* src, char* dst, int sr
|
||||
* define LZ4_PUBLISH_STATIC_FUNCTIONS when building the LZ4 library.
|
||||
******************************************************************************/
|
||||
|
||||
#ifdef LZ4_STATIC_LINKING_ONLY
|
||||
|
||||
#ifndef LZ4_STATIC_3504398509
|
||||
#define LZ4_STATIC_3504398509
|
||||
|
||||
#ifdef LZ4_PUBLISH_STATIC_FUNCTIONS
|
||||
#define LZ4LIB_STATIC_API LZ4LIB_API
|
||||
#else
|
||||
#define LZ4LIB_STATIC_API
|
||||
#endif
|
||||
|
||||
#ifdef LZ4_STATIC_LINKING_ONLY
|
||||
|
||||
|
||||
/*! LZ4_compress_fast_extState_fastReset() :
|
||||
* A variant of LZ4_compress_fast_extState().
|
||||
@ -462,8 +486,75 @@ LZ4LIB_STATIC_API int LZ4_compress_fast_extState_fastReset (void* state, const c
|
||||
*/
|
||||
LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dictionaryStream);
|
||||
|
||||
|
||||
/*! In-place compression and decompression
|
||||
*
|
||||
* It's possible to have input and output sharing the same buffer,
|
||||
* for highly contrained memory environments.
|
||||
* In both cases, it requires input to lay at the end of the buffer,
|
||||
* and decompression to start at beginning of the buffer.
|
||||
* Buffer size must feature some margin, hence be larger than final size.
|
||||
*
|
||||
* |<------------------------buffer--------------------------------->|
|
||||
* |<-----------compressed data--------->|
|
||||
* |<-----------decompressed size------------------>|
|
||||
* |<----margin---->|
|
||||
*
|
||||
* This technique is more useful for decompression,
|
||||
* since decompressed size is typically larger,
|
||||
* and margin is short.
|
||||
*
|
||||
* In-place decompression will work inside any buffer
|
||||
* which size is >= LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize).
|
||||
* This presumes that decompressedSize > compressedSize.
|
||||
* Otherwise, it means compression actually expanded data,
|
||||
* and it would be more efficient to store such data with a flag indicating it's not compressed.
|
||||
* This can happen when data is not compressible (already compressed, or encrypted).
|
||||
*
|
||||
* For in-place compression, margin is larger, as it must be able to cope with both
|
||||
* history preservation, requiring input data to remain unmodified up to LZ4_DISTANCE_MAX,
|
||||
* and data expansion, which can happen when input is not compressible.
|
||||
* As a consequence, buffer size requirements are much higher,
|
||||
* and memory savings offered by in-place compression are more limited.
|
||||
*
|
||||
* There are ways to limit this cost for compression :
|
||||
* - Reduce history size, by modifying LZ4_DISTANCE_MAX.
|
||||
* Note that it is a compile-time constant, so all compressions will apply this limit.
|
||||
* Lower values will reduce compression ratio, except when input_size < LZ4_DISTANCE_MAX,
|
||||
* so it's a reasonable trick when inputs are known to be small.
|
||||
* - Require the compressor to deliver a "maximum compressed size".
|
||||
* This is the `dstCapacity` parameter in `LZ4_compress*()`.
|
||||
* When this size is < LZ4_COMPRESSBOUND(inputSize), then compression can fail,
|
||||
* in which case, the return code will be 0 (zero).
|
||||
* The caller must be ready for these cases to happen,
|
||||
* and typically design a backup scheme to send data uncompressed.
|
||||
* The combination of both techniques can significantly reduce
|
||||
* the amount of margin required for in-place compression.
|
||||
*
|
||||
* In-place compression can work in any buffer
|
||||
* which size is >= (maxCompressedSize)
|
||||
* with maxCompressedSize == LZ4_COMPRESSBOUND(srcSize) for guaranteed compression success.
|
||||
* LZ4_COMPRESS_INPLACE_BUFFER_SIZE() depends on both maxCompressedSize and LZ4_DISTANCE_MAX,
|
||||
* so it's possible to reduce memory requirements by playing with them.
|
||||
*/
|
||||
|
||||
#define LZ4_DECOMPRESS_INPLACE_MARGIN(compressedSize) (((compressedSize) >> 8) + 32)
|
||||
#define LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(decompressedSize) ((decompressedSize) + LZ4_DECOMPRESS_INPLACE_MARGIN(decompressedSize)) /**< note: presumes that compressedSize < decompressedSize. note2: margin is overestimated a bit, since it could use compressedSize instead */
|
||||
|
||||
#ifndef LZ4_DISTANCE_MAX /* history window size; can be user-defined at compile time */
|
||||
# define LZ4_DISTANCE_MAX 65535 /* set to maximum value by default */
|
||||
#endif
|
||||
|
||||
#define LZ4_COMPRESS_INPLACE_MARGIN (LZ4_DISTANCE_MAX + 32) /* LZ4_DISTANCE_MAX can be safely replaced by srcSize when it's smaller */
|
||||
#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
|
||||
|
||||
#endif /* LZ4_STATIC_3504398509 */
|
||||
#endif /* LZ4_STATIC_LINKING_ONLY */
|
||||
|
||||
|
||||
|
||||
#ifndef LZ4_H_98237428734687
|
||||
#define LZ4_H_98237428734687
|
||||
|
||||
/*-************************************************************
|
||||
* PRIVATE DEFINITIONS
|
||||
@ -567,6 +658,7 @@ union LZ4_streamDecode_u {
|
||||
} ; /* previously typedef'd to LZ4_streamDecode_t */
|
||||
|
||||
|
||||
|
||||
/*-************************************
|
||||
* Obsolete Functions
|
||||
**************************************/
|
||||
@ -601,8 +693,8 @@ union LZ4_streamDecode_u {
|
||||
#endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */
|
||||
|
||||
/* Obsolete compression functions */
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* source, char* dest, int sourceSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* source, char* dest, int sourceSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize);
|
||||
LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize);
|
||||
@ -674,7 +766,7 @@ LZ4LIB_API int LZ4_decompress_fast_usingDict (const char* src, char* dst, int or
|
||||
LZ4LIB_API void LZ4_resetStream (LZ4_stream_t* streamPtr);
|
||||
|
||||
|
||||
#endif /* LZ4_H_2983827168210 */
|
||||
#endif /* LZ4_H_98237428734687 */
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
|
@ -213,8 +213,8 @@ static void LZ4F_writeLE64 (void* dst, U64 value64)
|
||||
|
||||
static const size_t minFHSize = LZ4F_HEADER_SIZE_MIN; /* 7 */
|
||||
static const size_t maxFHSize = LZ4F_HEADER_SIZE_MAX; /* 19 */
|
||||
static const size_t BHSize = 4; /* block header : size, and compress flag */
|
||||
static const size_t BFSize = 4; /* block footer : checksum (optional) */
|
||||
static const size_t BHSize = LZ4F_BLOCK_HEADER_SIZE; /* block header : size, and compress flag */
|
||||
static const size_t BFSize = LZ4F_BLOCK_CHECKSUM_SIZE; /* block footer : checksum (optional) */
|
||||
|
||||
|
||||
/*-************************************
|
||||
@ -327,6 +327,7 @@ static size_t LZ4F_compressBound_internal(size_t srcSize,
|
||||
{
|
||||
LZ4F_preferences_t prefsNull = LZ4F_INIT_PREFERENCES;
|
||||
prefsNull.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; /* worst case */
|
||||
prefsNull.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; /* worst case */
|
||||
{ const LZ4F_preferences_t* const prefsPtr = (preferencesPtr==NULL) ? &prefsNull : preferencesPtr;
|
||||
U32 const flush = prefsPtr->autoFlush | (srcSize==0);
|
||||
LZ4F_blockSizeID_t const blockID = prefsPtr->frameInfo.blockSizeID;
|
||||
@ -1130,8 +1131,10 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize
|
||||
}
|
||||
|
||||
/* control magic number */
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER)
|
||||
return err0r(LZ4F_ERROR_frameType_unknown);
|
||||
#endif
|
||||
dctx->frameInfo.frameType = LZ4F_frame;
|
||||
|
||||
/* Flags */
|
||||
@ -1170,10 +1173,12 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize
|
||||
|
||||
/* check header */
|
||||
assert(frameHeaderSize > 5);
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
{ BYTE const HC = LZ4F_headerChecksum(srcPtr+4, frameHeaderSize-5);
|
||||
if (HC != srcPtr[frameHeaderSize-1])
|
||||
return err0r(LZ4F_ERROR_headerChecksum_invalid);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* save */
|
||||
dctx->frameInfo.blockMode = (LZ4F_blockMode_t)blockMode;
|
||||
@ -1210,8 +1215,10 @@ size_t LZ4F_headerSize(const void* src, size_t srcSize)
|
||||
return 8;
|
||||
|
||||
/* control magic number */
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
if (LZ4F_readLE32(src) != LZ4F_MAGICNUMBER)
|
||||
return err0r(LZ4F_ERROR_frameType_unknown);
|
||||
#endif
|
||||
|
||||
/* Frame Header Size */
|
||||
{ BYTE const FLG = ((const BYTE*)src)[4];
|
||||
@ -1493,7 +1500,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
|
||||
/* next block is a compressed block */
|
||||
dctx->tmpInTarget = nextCBlockSize + crcSize;
|
||||
dctx->dStage = dstage_getCBlock;
|
||||
if (dstPtr==dstEnd) {
|
||||
if (dstPtr==dstEnd || srcPtr==srcEnd) {
|
||||
nextSrcSizeHint = BHSize + nextCBlockSize + crcSize;
|
||||
doAnotherStage = 0;
|
||||
}
|
||||
@ -1554,8 +1561,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
|
||||
}
|
||||
{ U32 const readCRC = LZ4F_readLE32(crcSrc);
|
||||
U32 const calcCRC = XXH32_digest(&dctx->blockChecksum);
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
if (readCRC != calcCRC)
|
||||
return err0r(LZ4F_ERROR_blockChecksum_invalid);
|
||||
#else
|
||||
(void)readCRC;
|
||||
(void)calcCRC;
|
||||
#endif
|
||||
} }
|
||||
dctx->dStage = dstage_getBlockHeader; /* new block */
|
||||
break;
|
||||
@ -1594,8 +1606,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
|
||||
assert(selectedIn != NULL); /* selectedIn is defined at this stage (either srcPtr, or dctx->tmpIn) */
|
||||
{ U32 const readBlockCrc = LZ4F_readLE32(selectedIn + dctx->tmpInTarget);
|
||||
U32 const calcBlockCrc = XXH32(selectedIn, dctx->tmpInTarget, 0);
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
if (readBlockCrc != calcBlockCrc)
|
||||
return err0r(LZ4F_ERROR_blockChecksum_invalid);
|
||||
#else
|
||||
(void)readBlockCrc;
|
||||
(void)calcBlockCrc;
|
||||
#endif
|
||||
} }
|
||||
|
||||
if ((size_t)(dstEnd-dstPtr) >= dctx->maxBlockSize) {
|
||||
@ -1723,8 +1740,13 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx,
|
||||
/* case dstage_checkSuffix: */ /* no direct entry, avoid initialization risks */
|
||||
{ U32 const readCRC = LZ4F_readLE32(selectedIn);
|
||||
U32 const resultCRC = XXH32_digest(&(dctx->xxh));
|
||||
#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
|
||||
if (readCRC != resultCRC)
|
||||
return err0r(LZ4F_ERROR_contentChecksum_invalid);
|
||||
#else
|
||||
(void)readCRC;
|
||||
(void)resultCRC;
|
||||
#endif
|
||||
nextSrcSizeHint = 0;
|
||||
LZ4F_resetDecompressionContext(dctx);
|
||||
doAnotherStage = 0;
|
||||
|
@ -253,6 +253,15 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx);
|
||||
#define LZ4F_HEADER_SIZE_MIN 7 /* LZ4 Frame header size can vary, depending on selected paramaters */
|
||||
#define LZ4F_HEADER_SIZE_MAX 19
|
||||
|
||||
/* Size in bytes of a block header in little-endian format. Highest bit indicates if block data is uncompressed */
|
||||
#define LZ4F_BLOCK_HEADER_SIZE 4
|
||||
|
||||
/* Size in bytes of a block checksum footer in little-endian format. */
|
||||
#define LZ4F_BLOCK_CHECKSUM_SIZE 4
|
||||
|
||||
/* Size in bytes of the content checksum. */
|
||||
#define LZ4F_CONTENT_CHECKSUM_SIZE 4
|
||||
|
||||
/*! LZ4F_compressBegin() :
|
||||
* will write the frame header into dstBuffer.
|
||||
* dstCapacity must be >= LZ4F_HEADER_SIZE_MAX bytes.
|
||||
|
120
mfbt/lz4/lz4hc.c
120
mfbt/lz4/lz4hc.c
@ -151,6 +151,21 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match,
|
||||
return back;
|
||||
}
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# define LZ4HC_rotl32(x,r) _rotl(x,r)
|
||||
#else
|
||||
# define LZ4HC_rotl32(x,r) ((x << r) | (x >> (32 - r)))
|
||||
#endif
|
||||
|
||||
|
||||
static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern)
|
||||
{
|
||||
size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3;
|
||||
if (bitsToRotate == 0)
|
||||
return pattern;
|
||||
return LZ4HC_rotl32(pattern, (int)bitsToRotate);
|
||||
}
|
||||
|
||||
/* LZ4HC_countPattern() :
|
||||
* pattern32 must be a sample of repetitive pattern of length 1, 2 or 4 (but not 3!) */
|
||||
static unsigned
|
||||
@ -203,6 +218,16 @@ LZ4HC_reverseCountPattern(const BYTE* ip, const BYTE* const iLow, U32 pattern)
|
||||
return (unsigned)(iStart - ip);
|
||||
}
|
||||
|
||||
/* LZ4HC_protectDictEnd() :
|
||||
* Checks if the match is in the last 3 bytes of the dictionary, so reading the
|
||||
* 4 byte MINMATCH would overflow.
|
||||
* @returns true if the match index is okay.
|
||||
*/
|
||||
static int LZ4HC_protectDictEnd(U32 const dictLimit, U32 const matchIndex)
|
||||
{
|
||||
return ((U32)((dictLimit - 1) - matchIndex) >= 3);
|
||||
}
|
||||
|
||||
typedef enum { rep_untested, rep_not, rep_confirmed } repeat_state_e;
|
||||
typedef enum { favorCompressionRatio=0, favorDecompressionSpeed } HCfavor_e;
|
||||
|
||||
@ -228,7 +253,7 @@ LZ4HC_InsertAndGetWiderMatch (
|
||||
const U32 dictLimit = hc4->dictLimit;
|
||||
const BYTE* const lowPrefixPtr = base + dictLimit;
|
||||
const U32 ipIndex = (U32)(ip - base);
|
||||
const U32 lowestMatchIndex = (hc4->lowLimit + 64 KB > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
|
||||
const U32 lowestMatchIndex = (hc4->lowLimit + (LZ4_DISTANCE_MAX + 1) > ipIndex) ? hc4->lowLimit : ipIndex - LZ4_DISTANCE_MAX;
|
||||
const BYTE* const dictBase = hc4->dictBase;
|
||||
int const lookBackLength = (int)(ip-iLowLimit);
|
||||
int nbAttempts = maxNbAttempts;
|
||||
@ -287,14 +312,21 @@ LZ4HC_InsertAndGetWiderMatch (
|
||||
if (chainSwap && matchLength==longest) { /* better match => select a better chain */
|
||||
assert(lookBackLength==0); /* search forward only */
|
||||
if (matchIndex + (U32)longest <= ipIndex) {
|
||||
int const kTrigger = 4;
|
||||
U32 distanceToNextMatch = 1;
|
||||
int const end = longest - MINMATCH + 1;
|
||||
int step = 1;
|
||||
int accel = 1 << kTrigger;
|
||||
int pos;
|
||||
for (pos = 0; pos <= longest - MINMATCH; pos++) {
|
||||
for (pos = 0; pos < end; pos += step) {
|
||||
U32 const candidateDist = DELTANEXTU16(chainTable, matchIndex + (U32)pos);
|
||||
step = (accel++ >> kTrigger);
|
||||
if (candidateDist > distanceToNextMatch) {
|
||||
distanceToNextMatch = candidateDist;
|
||||
matchChainPos = (U32)pos;
|
||||
} }
|
||||
accel = 1 << kTrigger;
|
||||
}
|
||||
}
|
||||
if (distanceToNextMatch > 1) {
|
||||
if (distanceToNextMatch > matchIndex) break; /* avoid overflow */
|
||||
matchIndex -= distanceToNextMatch;
|
||||
@ -313,34 +345,61 @@ LZ4HC_InsertAndGetWiderMatch (
|
||||
} else {
|
||||
repeat = rep_not;
|
||||
} }
|
||||
if ( (repeat == rep_confirmed)
|
||||
&& (matchCandidateIdx >= dictLimit) ) { /* same segment only */
|
||||
const BYTE* const matchPtr = base + matchCandidateIdx;
|
||||
if ( (repeat == rep_confirmed) && (matchCandidateIdx >= lowestMatchIndex)
|
||||
&& LZ4HC_protectDictEnd(dictLimit, matchCandidateIdx) ) {
|
||||
const int extDict = matchCandidateIdx < dictLimit;
|
||||
const BYTE* const matchPtr = (extDict ? dictBase : base) + matchCandidateIdx;
|
||||
if (LZ4_read32(matchPtr) == pattern) { /* good candidate */
|
||||
size_t const forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iHighLimit, pattern) + sizeof(pattern);
|
||||
const BYTE* const lowestMatchPtr = (lowPrefixPtr + LZ4_DISTANCE_MAX >= ip) ? lowPrefixPtr : ip - LZ4_DISTANCE_MAX;
|
||||
size_t const backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
|
||||
size_t const currentSegmentLength = backLength + forwardPatternLength;
|
||||
|
||||
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
|
||||
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
|
||||
matchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
|
||||
} else {
|
||||
matchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
|
||||
if (lookBackLength==0) { /* no back possible */
|
||||
size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
|
||||
if ((size_t)longest < maxML) {
|
||||
assert(base + matchIndex < ip);
|
||||
if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
|
||||
assert(maxML < 2 GB);
|
||||
longest = (int)maxML;
|
||||
*matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
|
||||
*startpos = ip;
|
||||
const BYTE* const dictStart = dictBase + hc4->lowLimit;
|
||||
const BYTE* const iLimit = extDict ? dictBase + dictLimit : iHighLimit;
|
||||
size_t forwardPatternLength = LZ4HC_countPattern(matchPtr+sizeof(pattern), iLimit, pattern) + sizeof(pattern);
|
||||
if (extDict && matchPtr + forwardPatternLength == iLimit) {
|
||||
U32 const rotatedPattern = LZ4HC_rotatePattern(forwardPatternLength, pattern);
|
||||
forwardPatternLength += LZ4HC_countPattern(lowPrefixPtr, iHighLimit, rotatedPattern);
|
||||
}
|
||||
{ const BYTE* const lowestMatchPtr = extDict ? dictStart : lowPrefixPtr;
|
||||
size_t backLength = LZ4HC_reverseCountPattern(matchPtr, lowestMatchPtr, pattern);
|
||||
size_t currentSegmentLength;
|
||||
if (!extDict && matchPtr - backLength == lowPrefixPtr && hc4->lowLimit < dictLimit) {
|
||||
U32 const rotatedPattern = LZ4HC_rotatePattern((U32)(-(int)backLength), pattern);
|
||||
backLength += LZ4HC_reverseCountPattern(dictBase + dictLimit, dictStart, rotatedPattern);
|
||||
}
|
||||
/* Limit backLength not go further than lowestMatchIndex */
|
||||
backLength = matchCandidateIdx - MAX(matchCandidateIdx - (U32)backLength, lowestMatchIndex);
|
||||
assert(matchCandidateIdx - backLength >= lowestMatchIndex);
|
||||
currentSegmentLength = backLength + forwardPatternLength;
|
||||
/* Adjust to end of pattern if the source pattern fits, otherwise the beginning of the pattern */
|
||||
if ( (currentSegmentLength >= srcPatternLength) /* current pattern segment large enough to contain full srcPatternLength */
|
||||
&& (forwardPatternLength <= srcPatternLength) ) { /* haven't reached this position yet */
|
||||
U32 const newMatchIndex = matchCandidateIdx + (U32)forwardPatternLength - (U32)srcPatternLength; /* best position, full pattern, might be followed by more match */
|
||||
if (LZ4HC_protectDictEnd(dictLimit, newMatchIndex))
|
||||
matchIndex = newMatchIndex;
|
||||
else {
|
||||
/* Can only happen if started in the prefix */
|
||||
assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
|
||||
matchIndex = dictLimit;
|
||||
}
|
||||
{ U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
|
||||
if (distToNextPattern > matchIndex) break; /* avoid overflow */
|
||||
matchIndex -= distToNextPattern;
|
||||
} } }
|
||||
} else {
|
||||
U32 const newMatchIndex = matchCandidateIdx - (U32)backLength; /* farthest position in current segment, will find a match of length currentSegmentLength + maybe some back */
|
||||
if (!LZ4HC_protectDictEnd(dictLimit, newMatchIndex)) {
|
||||
assert(newMatchIndex >= dictLimit - 3 && newMatchIndex < dictLimit && !extDict);
|
||||
matchIndex = dictLimit;
|
||||
} else {
|
||||
matchIndex = newMatchIndex;
|
||||
if (lookBackLength==0) { /* no back possible */
|
||||
size_t const maxML = MIN(currentSegmentLength, srcPatternLength);
|
||||
if ((size_t)longest < maxML) {
|
||||
assert(base + matchIndex < ip);
|
||||
if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break;
|
||||
assert(maxML < 2 GB);
|
||||
longest = (int)maxML;
|
||||
*matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */
|
||||
*startpos = ip;
|
||||
}
|
||||
{ U32 const distToNextPattern = DELTANEXTU16(chainTable, matchIndex);
|
||||
if (distToNextPattern > matchIndex) break; /* avoid overflow */
|
||||
matchIndex -= distToNextPattern;
|
||||
} } } } }
|
||||
continue;
|
||||
} }
|
||||
} } /* PA optimization */
|
||||
@ -1005,6 +1064,9 @@ static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBl
|
||||
ctxPtr->base = newBlock - ctxPtr->dictLimit;
|
||||
ctxPtr->end = newBlock;
|
||||
ctxPtr->nextToUpdate = ctxPtr->dictLimit; /* match referencing will resume from there */
|
||||
|
||||
/* cannot reference an extDict and a dictCtx at the same time */
|
||||
ctxPtr->dictCtx = NULL;
|
||||
}
|
||||
|
||||
static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr,
|
||||
|
@ -336,6 +336,9 @@ LZ4LIB_API void LZ4_resetStreamHC (LZ4_streamHC_t* streamHCPtr, int compressionL
|
||||
#ifndef LZ4_HC_SLO_098092834
|
||||
#define LZ4_HC_SLO_098092834
|
||||
|
||||
#define LZ4_STATIC_LINKING_ONLY /* LZ4LIB_STATIC_API */
|
||||
#include "lz4.h"
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user