diff --git a/.cirrus.yml b/.cirrus.yml
new file mode 100644
index 0000000..0c0e7a7
--- /dev/null
+++ b/.cirrus.yml
@@ -0,0 +1,5 @@
+freebsd_instance:
+ image_family: freebsd-12-1
+
+task:
+ script: pkg install -y gmake && gmake test
diff --git a/.gitattributes b/.gitattributes
index 16880e2..6212bd4 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,17 +1,21 @@
# Set the default behavior
* text eol=lf
+
# Explicitly declare source files
*.c text eol=lf
*.h text eol=lf
+
# Denote files that should not be modified.
*.odt binary
*.png binary
+
# Visual Studio
*.sln text eol=crlf
*.vcxproj* text eol=crlf
*.vcproj* text eol=crlf
*.suo binary
*.rc text eol=crlf
+
# Windows
*.bat text eol=crlf
-*.cmd text eol=crlf
\ No newline at end of file
+*.cmd text eol=crlf
diff --git a/.gitignore b/.gitignore
index 2a59a7d..d7ba96e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,9 @@ _codelite_lz4/
bin/
*.zip
+# analyzers
+infer-out
+
# Mac
.DS_Store
*.dSYM
diff --git a/.travis.yml b/.travis.yml
index bd29630..f201d52 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,9 +10,7 @@ matrix:
script:
- make # test library build
- make clean
- - make -C tests test-lz4 MOREFLAGS='-Werror -Wconversion -Wno-sign-conversion' | tee # test scenario where `stdout` is not the console
- - make clean
- - CFLAGS=-m32 make -C tests test-lz4-contentSize
+ - make test MOREFLAGS='-Werror -Wconversion -Wno-sign-conversion' | tee # test scenario where `stdout` is not the console
# Container-based 12.04 LTS Server Edition 64 bit (doesn't support 32-bit includes)
- name: (Precise) benchmark test
@@ -33,9 +31,15 @@ matrix:
script:
- CC=clang MOREFLAGS=-fsanitize=address make -C tests test-frametest test-fuzzer
- - name: Custom LZ4_DISTANCE_MAX
+ - name: Custom LZ4_DISTANCE_MAX ; lz4-wlib (CLI linked to dynamic library); LZ4_USER_MEMORY_FUNCTIONS
script:
- MOREFLAGS=-DLZ4_DISTANCE_MAX=8000 make check
+ - make clean
+ - make -C programs lz4-wlib
+ - make clean
+ - make -C tests fullbench-wmalloc # test LZ4_USER_MEMORY_FUNCTIONS
+ - make clean
+ - CC="c++ -Wno-deprecated" make -C tests fullbench-wmalloc # stricter function signature check
- name: (Precise) g++ and clang CMake test
dist: precise
@@ -47,18 +51,15 @@ matrix:
- make clean travis-install
- make clean clangtest
-
- # 14.04 LTS Server Edition 64 bit
- - name: (Trusty) i386 gcc test
- dist: trusty
+ - name: x32 compatibility test
addons:
apt:
packages:
- - libc6-dev-i386
- gcc-multilib
script:
- make -C tests test MOREFLAGS=-mx32
+ # 14.04 LTS Server Edition 64 bit
# presume clang >= v3.9.0
- name: (Trusty) USan test
dist: trusty
@@ -122,6 +123,21 @@ matrix:
- make platformTest CC=arm-linux-gnueabi-gcc QEMU_SYS=qemu-arm-static
- make platformTest CC=aarch64-linux-gnu-gcc QEMU_SYS=qemu-aarch64-static
+ - name: aarch64 real-hw tests
+ arch: arm64
+ script:
+ - make test
+
+ - name: PPC64LE real-hw tests
+ arch: ppc64le
+ script:
+ - make test
+
+ - name: IBM s390x real-hw tests
+ arch: s390x
+ script:
+ - make test
+
- name: (Xenial) gcc-5 compilation
dist: xenial
install:
diff --git a/Makefile b/Makefile
index f25f951..744005f 100644
--- a/Makefile
+++ b/Makefile
@@ -98,7 +98,7 @@ travis-install:
$(MAKE) -j1 install DESTDIR=~/install_test_dir
cmake:
- @cd contrib/cmake_unofficial; cmake $(CMAKE_PARAMS) CMakeLists.txt; $(MAKE)
+ @cd build/cmake; cmake $(CMAKE_PARAMS) CMakeLists.txt; $(MAKE)
endif
@@ -124,8 +124,8 @@ check:
.PHONY: test
test:
- $(MAKE) -C $(TESTDIR) $@
- $(MAKE) -C $(EXDIR) $@
+ CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" $(MAKE) -C $(TESTDIR) $@
+ CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" $(MAKE) -C $(EXDIR) $@
clangtest: CFLAGS ?= -O3
clangtest: CFLAGS += -Werror -Wconversion -Wno-sign-conversion
@@ -142,15 +142,20 @@ clangtest-native: clean
@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(PRGDIR) native CC=clang
@CFLAGS="-O3 -Werror -Wconversion -Wno-sign-conversion" $(MAKE) -C $(TESTDIR) native CC=clang
+usan: CC = clang
+usan: CFLAGS = -O3 -g -fsanitize=undefined -fno-sanitize-recover=undefined -fsanitize-recover=pointer-overflow
+usan: LDFLAGS = $(CFLAGS)
usan: clean
- CC=clang CFLAGS="-O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
+ CFLAGS="$(CFLAGS)" LDFLAGS="$(LDFLAGS)" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
usan32: clean
CFLAGS="-m32 -O3 -g -fsanitize=undefined" $(MAKE) test FUZZER_TIME="-T30s" NB_LOOPS=-i1
+SCANBUILD ?= scan-build
+SCANBUILD_FLAGS += --status-bugs -v --force-analyze-debug-code
.PHONY: staticAnalyze
staticAnalyze: clean
- CFLAGS=-g scan-build --status-bugs -v $(MAKE) all
+ CPPFLAGS=-DLZ4_DEBUG=1 CFLAGS=-g $(SCANBUILD) $(SCANBUILD_FLAGS) $(MAKE) all V=1 DEBUGLEVEL=1
.PHONY: cppcheck
cppcheck:
diff --git a/NEWS b/NEWS
index 860f15b..401931e 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,39 @@
+v1.9.3
+perf: highly improved speed in kernel space, by @terrelln
+perf: faster speed with Visual Studio, thanks to @wolfpld and @remittor
+perf: improved dictionary compression speed, by @felixhandte
+perf: fixed LZ4_compress_HC_destSize() ratio, detected by @hsiangkao
+perf: reduced stack usage in high compression mode, by @Yanpas
+api : LZ4_decompress_safe_partial() supports unknown compressed size, requested by @jfkthame
+api : improved LZ4F_compressBound() with automatic flushing, by Christopher Harvie
+api : can (de)compress to/from NULL without UBs
+api : fix alignment test on 32-bit systems (state initialization)
+api : fix LZ4_saveDictHC() in corner case scenario, detected by @IgorKorkin
+cli : `-l` legacy format is now compatible with `-m` multiple files, by Filipe Calasans
+cli : benchmark mode supports dictionary, by @rkoradi
+cli : fix --fast with large argument, detected by @picoHz
+build: link to user-defined memory functions with LZ4_USER_MEMORY_FUNCTIONS, suggested by Yuriy Levchenko
+build: contrib/cmake_unofficial/ moved to build/cmake/
+build: visual/* moved to build/
+build: updated meson script, by @neheb
+build: tinycc support, by Anton Kochkov
+install: Haiku support, by Jerome Duval
+doc : updated LZ4 frame format, clarify EndMark
+
+v1.9.2
+fix : out-of-bound read in exceptional circumstances when using decompress_partial(), by @terrelln
+fix : slim opportunity for out-of-bound write with compress_fast() with a large enough input and when providing an output smaller than recommended (< LZ4_compressBound(inputSize)), by @terrelln
+fix : rare data corruption bug with LZ4_compress_destSize(), by @terrelln
+fix : data corruption bug when Streaming with an Attached Dict in HC Mode, by @felixhandte
+perf: enable LZ4_FAST_DEC_LOOP on aarch64/GCC by default, by @prekageo
+perf: improved lz4frame streaming API speed, by @dreambottle
+perf: speed up lz4hc on slow patterns when using external dictionary, by @terrelln
+api: better in-place decompression and compression support
+cli : --list supports multi-frames files, by @gstedman
+cli: --version outputs to stdout
+cli : add option --best as an alias of -12 , by @Low-power
+misc: Integration into oss-fuzz by @cmeister2, expanded list of scenarios by @terrelln
+
v1.9.1
fix : decompression functions were reading a few bytes beyond input size (introduced in v1.9.0, reported by @ppodolsky and @danlark1)
api : fix : lz4frame initializers compatibility with c++, reported by @degski
diff --git a/README.OpenSource b/README.OpenSource
index 532ea31..85cd538 100755
--- a/README.OpenSource
+++ b/README.OpenSource
@@ -3,9 +3,9 @@
"Name": "lz4",
"License": "BSD 2-Clause License",
"License File": "LICENSE",
- "Version Number": "1.9.2",
+ "Version Number": "1.9.3",
"Owner": "chennaidong@huawei.com",
- "Upstream URL": "https://github.com/pierrec/lz4",
+ "Upstream URL": "https://github.com/lz4/lz4",
"Description": "This package provides a streaming interface to LZ4 data streams as well as low level compress and uncompress functions for LZ4 data blocks. The implementation is based on the reference C one."
}
]
diff --git a/README.md b/README.md
index 607fc4e..bdb028c 100644
--- a/README.md
+++ b/README.md
@@ -26,25 +26,13 @@ LZ4 library is provided as open-source software using BSD 2-Clause license.
|Branch |Status |
|------------|---------|
-|master | [![Build Status][travisMasterBadge]][travisLink] [![Build status][AppveyorMasterBadge]][AppveyorLink] [![coverity][coverBadge]][coverlink] |
|dev | [![Build Status][travisDevBadge]][travisLink] [![Build status][AppveyorDevBadge]][AppveyorLink] |
-[travisMasterBadge]: https://travis-ci.org/lz4/lz4.svg?branch=master "Continuous Integration test suite"
[travisDevBadge]: https://travis-ci.org/lz4/lz4.svg?branch=dev "Continuous Integration test suite"
[travisLink]: https://travis-ci.org/lz4/lz4
-[AppveyorMasterBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=master&svg=true "Windows test suite"
[AppveyorDevBadge]: https://ci.appveyor.com/api/projects/status/github/lz4/lz4?branch=dev&svg=true "Windows test suite"
[AppveyorLink]: https://ci.appveyor.com/project/YannCollet/lz4-1lndh
-[coverBadge]: https://scan.coverity.com/projects/4735/badge.svg "Static code analysis of Master branch"
-[coverlink]: https://scan.coverity.com/projects/4735
-> **Branch Policy:**
-> - The "master" branch is considered stable, at all times.
-> - The "dev" branch is the one where all contributions must be merged
- before being promoted to master.
-> + If you plan to propose a patch, please commit into the "dev" branch,
- or its own feature branch.
- Direct commit to "master" are not permitted.
Benchmarks
-------------------------
@@ -95,6 +83,17 @@ It is compatible with parallel builds (`-j#`).
[redirection]: https://www.gnu.org/prep/standards/html_node/Directory-Variables.html
[command redefinition]: https://www.gnu.org/prep/standards/html_node/Utilities-in-Makefiles.html
+Building LZ4 - Using vcpkg
+
+You can download and install LZ4 using the [vcpkg](https://github.com/Microsoft/vcpkg) dependency manager:
+
+ git clone https://github.com/Microsoft/vcpkg.git
+ cd vcpkg
+ ./bootstrap-vcpkg.sh
+ ./vcpkg integrate install
+ vcpkg install lz4
+
+The LZ4 port in vcpkg is kept up to date by Microsoft team members and community contributors. If the version is out of date, please [create an issue or pull request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
Documentation
-------------------------
diff --git a/appveyor.yml b/appveyor.yml
index 056719a..b4c27ef 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,22 +1,22 @@
version: 1.0.{build}
environment:
matrix:
- - COMPILER: "visual"
- CONFIGURATION: "Debug"
- PLATFORM: "x64"
- - COMPILER: "visual"
- CONFIGURATION: "Debug"
- PLATFORM: "Win32"
- - COMPILER: "visual"
- CONFIGURATION: "Release"
- PLATFORM: "x64"
- - COMPILER: "visual"
- CONFIGURATION: "Release"
- PLATFORM: "Win32"
- COMPILER: "gcc"
PLATFORM: "mingw64"
- COMPILER: "gcc"
PLATFORM: "mingw32"
+ - COMPILER: "visual"
+ CONFIGURATION: "Debug"
+ PLATFORM: "x64"
+ - COMPILER: "visual"
+ CONFIGURATION: "Debug"
+ PLATFORM: "Win32"
+ - COMPILER: "visual"
+ CONFIGURATION: "Release"
+ PLATFORM: "x64"
+ - COMPILER: "visual"
+ CONFIGURATION: "Release"
+ PLATFORM: "Win32"
- COMPILER: "gcc"
PLATFORM: "clang"
@@ -47,10 +47,14 @@ build_script:
make -v &&
echo ----- &&
if not [%PLATFORM%]==[clang] (
- make -C programs lz4 && make -C tests fullbench && make -C lib lib
+ make -C programs lz4 &&
+ make -C tests fullbench &&
+ make -C tests fuzzer &&
+ make -C lib lib V=1
) ELSE (
make -C programs lz4 CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" &&
make -C tests fullbench CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" &&
+ make -C tests fuzzer CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion" &&
make -C lib lib CC=clang MOREFLAGS="--target=x86_64-w64-mingw32 -Werror -Wconversion -Wno-sign-conversion"
)
)
@@ -63,39 +67,39 @@ build_script:
COPY lib\lz4hc.h bin\include\ &&
COPY lib\lz4frame.h bin\include\ &&
COPY lib\liblz4.a bin\static\liblz4_static.lib &&
- COPY lib\dll\liblz4.* bin\dll\ &&
+ COPY lib\dll\* bin\dll\ &&
COPY lib\dll\example\Makefile bin\example\ &&
COPY lib\dll\example\fullbench-dll.* bin\example\ &&
COPY lib\dll\example\README.md bin\ &&
COPY programs\lz4.exe bin\lz4.exe
)
- if [%COMPILER%]==[gcc] if [%PLATFORM%]==[mingw64] (
- 7z.exe a bin\lz4_x64.zip NEWS .\bin\lz4.exe .\bin\README.md .\bin\example .\bin\dll .\bin\static .\bin\include &&
+ 7z.exe a -bb1 bin\lz4_x64.zip NEWS .\bin\lz4.exe .\bin\README.md .\bin\example .\bin\dll .\bin\static .\bin\include &&
appveyor PushArtifact bin\lz4_x64.zip
)
- if [%COMPILER%]==[gcc] if [%PLATFORM%]==[mingw32] (
- 7z.exe a bin\lz4_x86.zip NEWS .\bin\lz4.exe .\bin\README.md .\bin\example .\bin\dll .\bin\static .\bin\include &&
+ 7z.exe a -bb1 bin\lz4_x86.zip NEWS .\bin\lz4.exe .\bin\README.md .\bin\example .\bin\dll .\bin\static .\bin\include &&
appveyor PushArtifact bin\lz4_x86.zip
)
- - if [%COMPILER%]==[gcc] (COPY tests\fullbench.exe programs\)
+ - if [%COMPILER%]==[gcc] (COPY tests\*.exe programs\)
- if [%COMPILER%]==[visual] (
ECHO *** &&
ECHO *** Building Visual Studio 2010 %PLATFORM%\%CONFIGURATION% &&
ECHO *** &&
- msbuild "visual\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+ msbuild "build\VS2010\lz4.sln" %ADDITIONALPARAM% /m /verbosity:minimal /property:PlatformToolset=v100 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /p:EnableWholeProgramOptimization=true /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
ECHO *** &&
ECHO *** Building Visual Studio 2012 %PLATFORM%\%CONFIGURATION% &&
ECHO *** &&
- msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+ msbuild "build\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v110 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
ECHO *** &&
ECHO *** Building Visual Studio 2013 %PLATFORM%\%CONFIGURATION% &&
ECHO *** &&
- msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+ msbuild "build\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v120 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
ECHO *** &&
ECHO *** Building Visual Studio 2015 %PLATFORM%\%CONFIGURATION% &&
ECHO *** &&
- msbuild "visual\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
- COPY visual\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe programs\
+ msbuild "build\VS2010\lz4.sln" /m /verbosity:minimal /property:PlatformToolset=v140 /t:Clean,Build /p:Platform=%PLATFORM% /p:Configuration=%CONFIGURATION% /logger:"C:\Program Files\AppVeyor\BuildAgent\Appveyor.MSBuildLogger.dll" &&
+ COPY build\VS2010\bin\%PLATFORM%_%CONFIGURATION%\*.exe programs\
)
test_script:
@@ -110,7 +114,9 @@ test_script:
lz4 -i1b10 lz4.exe &&
lz4 -i1b15 lz4.exe &&
echo ------- lz4 tested ------- &&
- fullbench.exe -i1 fullbench.exe
+ fullbench.exe -i1 fullbench.exe &&
+ echo trying to launch fuzzer.exe &&
+ fuzzer.exe -v -T30s
)
artifacts:
diff --git a/build/.gitignore b/build/.gitignore
new file mode 100644
index 0000000..69e1111
--- /dev/null
+++ b/build/.gitignore
@@ -0,0 +1,16 @@
+# Visual C++
+.vs/
+*Copy
+*.db
+*.opensdf
+*.sdf
+*.suo
+*.user
+ver*/
+VS2010/bin/
+VS2017/bin/
+ipch
+
+# Fixup for lz4 project directories
+!VS2010/lz4
+!VS2017/lz4
diff --git a/visual/README.md b/build/README.md
similarity index 81%
rename from visual/README.md
rename to build/README.md
index 216971f..d416aeb 100644
--- a/visual/README.md
+++ b/build/README.md
@@ -4,7 +4,9 @@ Projects for various integrated development environments (IDE)
#### Included projects
The following projects are included with the lz4 distribution:
+- `cmake` - CMake project
- `VS2010` - Visual Studio 2010 project (which also works well with Visual Studio 2012, 2013, 2015)
+- `VS2017` - Visual Studio 2017 project
#### How to compile lz4 with Visual Studio
@@ -12,25 +14,25 @@ The following projects are included with the lz4 distribution:
1. Install Visual Studio e.g. VS 2015 Community Edition (it's free).
2. Download the latest version of lz4 from https://github.com/lz4/lz4/releases
3. Decompress ZIP archive.
-4. Go to decompressed directory then to `visual` then `VS2010` and open `lz4.sln`
+4. Go to decompressed directory then to `build` then `VS2010` and open `lz4.sln`
5. Visual Studio will ask about converting VS2010 project to VS2015 and you should agree.
6. Change `Debug` to `Release` and if you have 64-bit Windows change also `Win32` to `x64`.
7. Press F7 on keyboard or select `BUILD` from the menu bar and choose `Build Solution`.
-8. If compilation will be fine a compiled executable will be in `visual\VS2010\bin\x64_Release\lz4.exe`
+8. If compilation will be fine a compiled executable will be in `build\VS2010\bin\x64_Release\lz4.exe`
#### Projects available within lz4.sln
The Visual Studio solution file `lz4.sln` contains many projects that will be compiled to the
-`visual\VS2010\bin\$(Platform)_$(Configuration)` directory. For example `lz4` set to `x64` and
-`Release` will be compiled to `visual\VS2010\bin\x64_Release\lz4.exe`. The solution file contains the
+`build\VS2010\bin\$(Platform)_$(Configuration)` directory. For example `lz4` set to `x64` and
+`Release` will be compiled to `build\VS2010\bin\x64_Release\lz4.exe`. The solution file contains the
following projects:
- `lz4` : Command Line Utility, supporting gzip-like arguments
- `datagen` : Synthetic and parametrable data generator, for tests
- `frametest` : Test tool that checks lz4frame integrity on target platform
- `fullbench` : Precisely measure speed for each lz4 inner functions
-- `fuzzer` : Test tool, to check lz4 integrity on target platform
+- `fuzzer` : Test tool, to check lz4 integrity on target platform
- `liblz4` : A static LZ4 library compiled to `liblz4_static.lib`
- `liblz4-dll` : A dynamic LZ4 library (DLL) compiled to `liblz4.dll` with the import library `liblz4.lib`
- `fullbench-dll` : The fullbench program compiled with the import library; the executable requires LZ4 DLL
@@ -39,8 +41,8 @@ following projects:
#### Using LZ4 DLL with Microsoft Visual C++ project
The header files `lib\lz4.h`, `lib\lz4hc.h`, `lib\lz4frame.h` and the import library
-`visual\VS2010\bin\$(Platform)_$(Configuration)\liblz4.lib` are required to compile a
-project using Visual C++.
+`build\VS2010\bin\$(Platform)_$(Configuration)\liblz4.lib` are required to
+compile a project using Visual C++.
1. The path to header files should be added to `Additional Include Directories` that can
be found in Project Properties of Visual Studio IDE in the `C/C++` Property Pages on the `General` page.
@@ -50,4 +52,4 @@ project using Visual C++.
then the directory has to be added to `Linker\General\Additional Library Directories`.
The compiled executable will require LZ4 DLL which is available at
-`visual\VS2010\bin\$(Platform)_$(Configuration)\liblz4.dll`.
+`build\VS2010\bin\$(Platform)_$(Configuration)\liblz4.dll`.
diff --git a/visual/VS2010/datagen/datagen.vcxproj b/build/VS2010/datagen/datagen.vcxproj
similarity index 100%
rename from visual/VS2010/datagen/datagen.vcxproj
rename to build/VS2010/datagen/datagen.vcxproj
diff --git a/visual/VS2010/frametest/frametest.vcxproj b/build/VS2010/frametest/frametest.vcxproj
similarity index 100%
rename from visual/VS2010/frametest/frametest.vcxproj
rename to build/VS2010/frametest/frametest.vcxproj
diff --git a/visual/VS2010/fullbench-dll/fullbench-dll.vcxproj b/build/VS2010/fullbench-dll/fullbench-dll.vcxproj
similarity index 100%
rename from visual/VS2010/fullbench-dll/fullbench-dll.vcxproj
rename to build/VS2010/fullbench-dll/fullbench-dll.vcxproj
diff --git a/visual/VS2010/fullbench/fullbench.vcxproj b/build/VS2010/fullbench/fullbench.vcxproj
similarity index 100%
rename from visual/VS2010/fullbench/fullbench.vcxproj
rename to build/VS2010/fullbench/fullbench.vcxproj
diff --git a/visual/VS2010/fuzzer/fuzzer.vcxproj b/build/VS2010/fuzzer/fuzzer.vcxproj
similarity index 100%
rename from visual/VS2010/fuzzer/fuzzer.vcxproj
rename to build/VS2010/fuzzer/fuzzer.vcxproj
diff --git a/visual/VS2010/liblz4-dll/liblz4-dll.rc b/build/VS2010/liblz4-dll/liblz4-dll.rc
similarity index 100%
rename from visual/VS2010/liblz4-dll/liblz4-dll.rc
rename to build/VS2010/liblz4-dll/liblz4-dll.rc
diff --git a/visual/VS2010/liblz4-dll/liblz4-dll.vcxproj b/build/VS2010/liblz4-dll/liblz4-dll.vcxproj
similarity index 100%
rename from visual/VS2010/liblz4-dll/liblz4-dll.vcxproj
rename to build/VS2010/liblz4-dll/liblz4-dll.vcxproj
diff --git a/visual/VS2010/liblz4/liblz4.vcxproj b/build/VS2010/liblz4/liblz4.vcxproj
similarity index 100%
rename from visual/VS2010/liblz4/liblz4.vcxproj
rename to build/VS2010/liblz4/liblz4.vcxproj
diff --git a/visual/VS2010/lz4.sln b/build/VS2010/lz4.sln
similarity index 100%
rename from visual/VS2010/lz4.sln
rename to build/VS2010/lz4.sln
diff --git a/build/VS2010/lz4/lz4.rc b/build/VS2010/lz4/lz4.rc
new file mode 100644
index 0000000..c593edf
--- /dev/null
+++ b/build/VS2010/lz4/lz4.rc
@@ -0,0 +1,51 @@
+// Microsoft Visual C++ generated resource script.
+//
+
+#include "lz4.h" /* LZ4_VERSION_STRING */
+#define APSTUDIO_READONLY_SYMBOLS
+#include "verrsrc.h"
+#undef APSTUDIO_READONLY_SYMBOLS
+
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+LANGUAGE 9, 1
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION LZ4_VERSION_MAJOR,LZ4_VERSION_MINOR,LZ4_VERSION_RELEASE,0
+ PRODUCTVERSION LZ4_VERSION_MAJOR,LZ4_VERSION_MINOR,LZ4_VERSION_RELEASE,0
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+#ifdef _DEBUG
+ FILEFLAGS VS_FF_DEBUG
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS VOS_NT_WINDOWS32
+ FILETYPE VFT_DLL
+ FILESUBTYPE VFT2_UNKNOWN
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904B0"
+ BEGIN
+ VALUE "CompanyName", "Yann Collet"
+ VALUE "FileDescription", "Extremely fast compression"
+ VALUE "FileVersion", LZ4_VERSION_STRING
+ VALUE "InternalName", "lz4.exe"
+ VALUE "LegalCopyright", "Copyright (C) 2013-2016, Yann Collet"
+ VALUE "OriginalFilename", "lz4.exe"
+ VALUE "ProductName", "LZ4"
+ VALUE "ProductVersion", LZ4_VERSION_STRING
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x0409, 1200
+ END
+END
+
+#endif
diff --git a/build/VS2010/lz4/lz4.vcxproj b/build/VS2010/lz4/lz4.vcxproj
new file mode 100644
index 0000000..de7a714
--- /dev/null
+++ b/build/VS2010/lz4/lz4.vcxproj
@@ -0,0 +1,189 @@
+
+ ";
+ for (l=0; l` header for the first line
+- comments of type `/*=` and `/**=` mean: use a `
` header and show also all functions until first empty line
+- comments of type `/*X` where `X` is different from above-mentioned are ignored
+
+Moreover:
+- `LZ4LIB_API` is removed to improve readability
+- `typedef` are detected and included even if uncommented
+- comments of type `/**<` and `/*!<` are detected and only function declaration is highlighted (bold)
+
+
+#### Usage
+
+The program requires 3 parameters:
+```
+gen_manual [lz4_version] [input_file] [output_html]
+```
+
+To compile program and generate lz4 manual we have used:
+```
+make
+./gen_manual.exe 1.7.3 ../../lib/lz4.h lz4_manual.html
+```
diff --git a/contrib/gen_manual/gen-lz4-manual.sh b/contrib/gen_manual/gen-lz4-manual.sh
new file mode 100644
index 0000000..73a7214
--- /dev/null
+++ b/contrib/gen_manual/gen-lz4-manual.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+LIBVER_MAJOR_SCRIPT=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/lz4.h`
+LIBVER_MINOR_SCRIPT=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/lz4.h`
+LIBVER_PATCH_SCRIPT=`sed -n '/define LZ4_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < ../../lib/lz4.h`
+LIBVER_SCRIPT=$LIBVER_MAJOR_SCRIPT.$LIBVER_MINOR_SCRIPT.$LIBVER_PATCH_SCRIPT
+
+echo LZ4_VERSION=$LIBVER_SCRIPT
+./gen_manual "lz4 $LIBVER_SCRIPT" ../../lib/lz4.h ./lz4_manual.html
+./gen_manual "lz4frame $LIBVER_SCRIPT" ../../lib/lz4frame.h ./lz4frame_manual.html
diff --git a/contrib/gen_manual/gen_manual.cpp b/contrib/gen_manual/gen_manual.cpp
new file mode 100644
index 0000000..d5fe702
--- /dev/null
+++ b/contrib/gen_manual/gen_manual.cpp
@@ -0,0 +1,248 @@
+/*
+Copyright (c) 2016-present, Przemyslaw Skibinski
+All rights reserved.
+
+BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- LZ4 homepage : http://www.lz4.org
+- LZ4 source repository : https://github.com/lz4/lz4
+*/
+
+#include
";
+ for (l=0; l
" << endl;
+ continue;
+ }
+
+ /* comments of type / * * < and / * ! < are detected, and only function declaration is highlighted (bold) */
+ if ((line.find("/**<")!=string::npos || line.find("/*!<")!=string::npos)
+ && line.find("*/")!=string::npos) {
+ sout << "";
+ print_line(sout, line);
+ sout << "
" << endl;
+ continue;
+ }
+
+ spos = line.find("/**=");
+ if (spos==string::npos) {
+ spos = line.find("/*!");
+ if (spos==string::npos)
+ spos = line.find("/**");
+ if (spos==string::npos)
+ spos = line.find("/*-");
+ if (spos==string::npos)
+ spos = line.find("/*=");
+ if (spos==string::npos)
+ continue;
+ exclam = line[spos+2];
+ }
+ else exclam = '=';
+
+ comments = get_lines(input, linenum, "*/");
+ if (!comments.empty()) comments[0] = line.substr(spos+3);
+ if (!comments.empty())
+ comments[comments.size()-1] = comments[comments.size()-1].substr(0, comments[comments.size()-1].find("*/"));
+ for (l=0; l
" << endl << endl;
+ } else if (exclam == '=') { /* comments of type / * = and / * * = mean: use a header and show also all functions until first empty line */
+ trim(comments[0], " ");
+ sout << "
" << comments[0] << "
";
+ for (l=1; l
" << endl << endl;
+ }
+ }
+
+ ostream << "\n\n\n";
+ lines = get_lines(input, ++linenum, "");
+ for (l=0; l
" << endl;
+ } else { /* comments of type / * * and / * - mean: this is a comment; use a header for the first line */
+ if (comments.empty()) continue;
+
+ trim(comments[0], " ");
+ sout << "
" << comments[0] << "
";
+ chapters.push_back(comments[0]);
+ chapter++;
+
+ for (l=1; l
" << endl << endl;
+ else
+ sout << "" << version << "
\n";
+
+ ostream << "
\nContents
\n\n";
+ for (size_t i=0; i
\n
\n";
+
+ ostream << sout.str();
+ ostream << "" << endl << "" << endl;
+
+ return 0;
+}
diff --git a/contrib/meson/README.md b/contrib/meson/README.md
new file mode 100644
index 0000000..a44850a
--- /dev/null
+++ b/contrib/meson/README.md
@@ -0,0 +1,34 @@
+Meson build system for lz4
+==========================
+
+Meson is a build system designed to optimize programmer productivity.
+It aims to do this by providing simple, out-of-the-box support for
+modern software development tools and practices, such as unit tests,
+coverage reports, Valgrind, CCache and the like.
+
+This Meson build system is provided with no guarantee.
+
+## How to build
+
+`cd` to this meson directory (`contrib/meson`)
+
+```sh
+meson setup --buildtype=release -Ddefault_library=shared -Dbin_programs=true builddir
+cd builddir
+ninja # to build
+ninja install # to install
+```
+
+You might want to install it in staging directory:
+
+```sh
+DESTDIR=./staging ninja install
+```
+
+To configure build options, use:
+
+```sh
+meson configure
+```
+
+See [man meson(1)](https://manpages.debian.org/testing/meson/meson.1.en.html).
diff --git a/contrib/meson/meson.build b/contrib/meson/meson.build
new file mode 100644
index 0000000..d1e97d9
--- /dev/null
+++ b/contrib/meson/meson.build
@@ -0,0 +1,21 @@
+# #############################################################################
+# Copyright (c) 2018-present lzutao 1.9.2 Manual
+1.9.3 Manual
Contents
@@ -16,7 +16,7 @@
@@ -117,7 +117,8 @@
The larger the acceleration value, the faster the algorithm, but also the lesser the compression.
It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed.
An acceleration value of "1" is the same as regular LZ4_compress_default()
- Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+ Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c).
+ Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c).
int LZ4_sizeofState(void);
@@ -140,31 +141,53 @@ int LZ4_compress_fast_extState (void* state, const char* src, char* dst, int src
New value is necessarily <= input value.
@return : Nb bytes written into 'dst' (necessarily <= targetDestSize)
or 0 if compression fails.
+
+ Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+):
+ the produced compressed content could, in specific circumstances,
+ require to be decompressed into a destination buffer larger
+ by at least 1 byte than the content to decompress.
+ If an application uses `LZ4_compress_destSize()`,
+ it's highly recommended to update liblz4 to v1.9.2 or better.
+ If this can't be done or ensured,
+ the receiving decompression function should provide
+ a dstCapacity which is > decompressedSize, by at least 1 byte.
+ See https://github.com/lz4/lz4/issues/859 for details
+
int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity);Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', into destination buffer 'dst' of size 'dstCapacity'. Up to 'targetOutputSize' bytes will be decoded. - The function stops decoding on reaching this objective, - which can boost performance when only the beginning of a block is required. + The function stops decoding on reaching this objective. + This can be useful to boost performance + whenever only the beginning of a block is required. - @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) + @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) If source stream is detected malformed, function returns a negative result. - Note : @return can be < targetOutputSize, if compressed block contains less data. + Note 1 : @return can be < targetOutputSize, if compressed block contains less data. - Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity, - and expects targetOutputSize <= dstCapacity. - It effectively stops decoding on reaching targetOutputSize, + Note 2 : targetOutputSize must be <= dstCapacity + + Note 3 : this function effectively stops decoding on reaching targetOutputSize, so dstCapacity is kind of redundant. - This is because in a previous version of this function, - decoding operation would not "break" a sequence in the middle. - As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize, + This is because in older versions of this function, + decoding operation would still write complete sequences. + Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, it could write more bytes, though only up to dstCapacity. Some "margin" used to be required for this operation to work properly. - This is no longer necessary. - The function nonetheless keeps its signature, in an effort to not break API. + Thankfully, this is no longer necessary. + The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + + Note 4 : If srcSize is the exact size of the block, + then targetOutputSize can be any value, + including larger than the block's decompressed size. + The function will, at most, generate block's decompressed size. + + Note 5 : If srcSize is _larger_ than block's compressed size, + then targetOutputSize **MUST** be <= block's decompressed size. + Otherwise, *silent corruption will occur*.
#define LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCompressedSize) ((maxCompressedSize) + LZ4_COMPRESS_INPLACE_MARGIN) /**< maxCompressedSize is generally LZ4_COMPRESSBOUND(inputSize), but can be set to any lower value, with the risk that compression can fail (return code 0(zero)) */
+Private Definitions
Do not use these definitions directly. They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. - Accessing members will expose code to API and/or ABI break in future versions of the library. + Accessing members will expose user code to API and/or ABI break in future versions of the library.typedef struct { - const uint8_t* externalDict; + const LZ4_byte* externalDict; size_t extDictSize; - const uint8_t* prefixEnd; + const LZ4_byte* prefixEnd; size_t prefixSize; } LZ4_streamDecode_t_internal;
-typedef struct { - const unsigned char* externalDict; - const unsigned char* prefixEnd; - size_t extDictSize; - size_t prefixSize; -} LZ4_streamDecode_t_internal; -
-#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) /*AS-400*/ ) -#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long)) +#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */ +#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*)) union LZ4_stream_u { - unsigned long long table[LZ4_STREAMSIZE_U64]; + void* table[LZ4_STREAMSIZE_VOIDP]; LZ4_stream_t_internal internal_donotuse; -} ; /* previously typedef'd to LZ4_stream_t */ -/* previously typedef'd to LZ4_stream_t */ +information structure to track an LZ4 stream. +};
Do not use below internal definitions directly ! + Declare or allocate an LZ4_stream_t instead. LZ4_stream_t can also be created using LZ4_createStream(), which is recommended. The structure definition can be convenient for static allocation (on stack, or as part of larger structure). Init this structure with LZ4_initStream() before first use. note : only use this definition in association with static linking ! - this definition is not API/ABI safe, and may change in a future version. + this definition is not API/ABI safe, and may change in future versions.
@@ -494,18 +511,17 @@ union LZ4_streamDecode_u {#ifdef LZ4_DISABLE_DEPRECATE_WARNINGS # define LZ4_DEPRECATED(message) /* disable deprecation warnings */ #else -# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ # define LZ4_DEPRECATED(message) [[deprecated(message)]] -# elif (LZ4_GCC_VERSION >= 405) || defined(__clang__) -# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) -# elif (LZ4_GCC_VERSION >= 301) -# define LZ4_DEPRECATED(message) __attribute__((deprecated)) # elif defined(_MSC_VER) # define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) # else -# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler") -# define LZ4_DEPRECATED(message) +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ # endif #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */@@ -520,18 +536,39 @@ union LZ4_streamDecode_u {
-These functions used to be faster than LZ4_decompress_safe(), - but it has changed, and they are now slower than LZ4_decompress_safe(). +
LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); +LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_limitedOutput_withState (void* state, const char* source, char* dest, int inputSize, int maxOutputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); +LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); +
+ +LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); +LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); +
+ +LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); +LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); +
+ +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") +int LZ4_decompress_fast (const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") +int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* src, char* dst, int originalSize); +LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_usingDict() instead") +int LZ4_decompress_fast_usingDict (const char* src, char* dst, int originalSize, const char* dictStart, int dictSize); +These functions used to be faster than LZ4_decompress_safe(), + but this is no longer the case. They are now slower. This is because LZ4_decompress_fast() doesn't know the input size, - and therefore must progress more cautiously in the input buffer to not read beyond the end of block. + and therefore must progress more cautiously into the input buffer to not read beyond the end of block. On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. The last remaining LZ4_decompress_fast() specificity is that it can decompress a block without knowing its compressed size. - Such functionality could be achieved in a more secure manner, - by also providing the maximum size of input buffer, - but it would require new prototypes, and adaptation of the implementation to this new use case. + Such functionality can be achieved in a more secure manner + by employing LZ4_decompress_safe_partial(). Parameters: originalSize : is the uncompressed size to regenerate. diff --git a/doc/lz4frame_manual.html b/doc/lz4frame_manual.html index 72f27c8..2758306 100644 --- a/doc/lz4frame_manual.html +++ b/doc/lz4frame_manual.html @@ -1,10 +1,10 @@
-1.9.2 Manual +1.9.3 Manual -1.9.2 Manual
+1.9.3 Manual
Contents
@@ -167,7 +167,7 @@ LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctx); @return is always the same for a srcSize and prefsPtr. prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario. tech details : - @return includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes. + @return if automatic flushing is not enabled, includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes. It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd(). @return doesn't include frame header, as it was already generated by LZ4F_compressBegin(). @@ -299,8 +299,10 @@ LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx); void* dstBuffer, size_t* dstSizePtr, const void* srcBuffer, size_t* srcSizePtr, const LZ4F_decompressOptions_t* dOptPtr); -
Call this function repetitively to regenerate compressed data from `srcBuffer`. - The function will read up to *srcSizePtr bytes from srcBuffer, +
Call this function repetitively to regenerate data compressed in `srcBuffer`. + + The function requires a valid dctx state. + It will read up to *srcSizePtr bytes from srcBuffer, and decompress data into dstBuffer, of capacity *dstSizePtr. The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value). diff --git a/examples/.gitignore b/examples/.gitignore new file mode 100644 index 0000000..5abeef6 --- /dev/null +++ b/examples/.gitignore @@ -0,0 +1,10 @@ +/Makefile.lz4* +/printVersion +/doubleBuffer +/dictionaryRandomAccess +/ringBuffer +/ringBufferHC +/lineCompress +/frameCompress +/simpleBuffer +/*.exe diff --git a/examples/COPYING b/examples/COPYING new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/examples/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + +
+ Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/examples/HCStreaming_ringBuffer.c b/examples/HCStreaming_ringBuffer.c new file mode 100644 index 0000000..bc8391e --- /dev/null +++ b/examples/HCStreaming_ringBuffer.c @@ -0,0 +1,232 @@ +// LZ4 HC streaming API example : ring buffer +// Based on a previous example by Takayuki Matsuoka + + +/************************************** + * Compiler Options + **************************************/ +#if defined(_MSC_VER) && (_MSC_VER <= 1800) /* Visual Studio <= 2013 */ +# define _CRT_SECURE_NO_WARNINGS +# define snprintf sprintf_s +#endif + +#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wmissing-braces" /* GCC bug 53119 : doesn't accept { 0 } as initializer (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119) */ +#endif + + +/************************************** + * Includes + **************************************/ +#include "lz4hc.h" +#include "lz4.h" + +#include +#include +#include +#include +#include + +enum { + MESSAGE_MAX_BYTES = 1024, + RING_BUFFER_BYTES = 1024 * 8 + MESSAGE_MAX_BYTES, + DEC_BUFFER_BYTES = RING_BUFFER_BYTES + MESSAGE_MAX_BYTES // Intentionally larger to test unsynchronized ring buffers +}; + + +size_t write_int32(FILE* fp, int32_t i) { + return fwrite(&i, sizeof(i), 1, fp); +} + +size_t write_bin(FILE* fp, const void* array, int arrayBytes) { + assert(arrayBytes >= 0); + return fwrite(array, 1, (size_t)arrayBytes, fp); +} + +size_t read_int32(FILE* fp, int32_t* i) { + return fread(i, sizeof(*i), 1, fp); +} + +size_t read_bin(FILE* fp, void* array, int arrayBytes) { + assert(arrayBytes >= 0); + return fread(array, 1, (size_t)arrayBytes, fp); +} + + +void test_compress(FILE* outFp, FILE* inpFp) +{ + LZ4_streamHC_t lz4Stream_body = { 0 }; + LZ4_streamHC_t* lz4Stream = &lz4Stream_body; + + static char inpBuf[RING_BUFFER_BYTES]; + int inpOffset = 0; + + for(;;) { + // Read random length ([1,MESSAGE_MAX_BYTES]) data to the ring buffer. + char* const inpPtr = &inpBuf[inpOffset]; + const int randomLength = (rand() % MESSAGE_MAX_BYTES) + 1; + const int inpBytes = (int) read_bin(inpFp, inpPtr, randomLength); + if (0 == inpBytes) break; + +#define CMPBUFSIZE (LZ4_COMPRESSBOUND(MESSAGE_MAX_BYTES)) + { char cmpBuf[CMPBUFSIZE]; + const int cmpBytes = LZ4_compress_HC_continue(lz4Stream, inpPtr, cmpBuf, inpBytes, CMPBUFSIZE); + + if(cmpBytes <= 0) break; + write_int32(outFp, cmpBytes); + write_bin(outFp, cmpBuf, cmpBytes); + + inpOffset += inpBytes; + + // Wraparound the ringbuffer offset + if(inpOffset >= RING_BUFFER_BYTES - MESSAGE_MAX_BYTES) + inpOffset = 0; + } + } + + write_int32(outFp, 0); +} + + +void test_decompress(FILE* outFp, FILE* inpFp) +{ + static char decBuf[DEC_BUFFER_BYTES]; + int decOffset = 0; + LZ4_streamDecode_t lz4StreamDecode_body = { 0 }; + LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body; + + for(;;) { + int cmpBytes = 0; + char cmpBuf[CMPBUFSIZE]; + + { const size_t r0 = read_int32(inpFp, &cmpBytes); + size_t r1; + if(r0 != 1 || cmpBytes <= 0) + break; + + r1 = read_bin(inpFp, cmpBuf, cmpBytes); + if(r1 != (size_t) cmpBytes) + break; + } + + { char* const decPtr = &decBuf[decOffset]; + const int decBytes = LZ4_decompress_safe_continue( + lz4StreamDecode, cmpBuf, decPtr, cmpBytes, MESSAGE_MAX_BYTES); + if(decBytes <= 0) + break; + + decOffset += decBytes; + write_bin(outFp, decPtr, decBytes); + + // Wraparound the ringbuffer offset + if(decOffset >= DEC_BUFFER_BYTES - MESSAGE_MAX_BYTES) + decOffset = 0; + } + } +} + + +// Compare 2 files content +// return 0 if identical +// return ByteNb>0 if different +size_t compare(FILE* f0, FILE* f1) +{ + size_t result = 1; + + for (;;) { + char b0[65536]; + char b1[65536]; + const size_t r0 = fread(b0, 1, sizeof(b0), f0); + const size_t r1 = fread(b1, 1, sizeof(b1), f1); + + if ((r0==0) && (r1==0)) return 0; // success + + if (r0 != r1) { + size_t smallest = r0; + if (r1 +#include +#include +#include + +enum { + BLOCK_BYTES = 1024 * 8, +// BLOCK_BYTES = 1024 * 64, +}; + + +size_t write_int(FILE* fp, int i) { + return fwrite(&i, sizeof(i), 1, fp); +} + +size_t write_bin(FILE* fp, const void* array, size_t arrayBytes) { + return fwrite(array, 1, arrayBytes, fp); +} + +size_t read_int(FILE* fp, int* i) { + return fread(i, sizeof(*i), 1, fp); +} + +size_t read_bin(FILE* fp, void* array, size_t arrayBytes) { + return fread(array, 1, arrayBytes, fp); +} + + +void test_compress(FILE* outFp, FILE* inpFp) +{ + LZ4_stream_t lz4Stream_body; + LZ4_stream_t* lz4Stream = &lz4Stream_body; + + char inpBuf[2][BLOCK_BYTES]; + int inpBufIndex = 0; + + LZ4_initStream(lz4Stream, sizeof (*lz4Stream)); + + for(;;) { + char* const inpPtr = inpBuf[inpBufIndex]; + const int inpBytes = (int) read_bin(inpFp, inpPtr, BLOCK_BYTES); + if(0 == inpBytes) { + break; + } + + { + char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)]; + const int cmpBytes = LZ4_compress_fast_continue( + lz4Stream, inpPtr, cmpBuf, inpBytes, sizeof(cmpBuf), 1); + if(cmpBytes <= 0) { + break; + } + write_int(outFp, cmpBytes); + write_bin(outFp, cmpBuf, (size_t) cmpBytes); + } + + inpBufIndex = (inpBufIndex + 1) % 2; + } + + write_int(outFp, 0); +} + + +void test_decompress(FILE* outFp, FILE* inpFp) +{ + LZ4_streamDecode_t lz4StreamDecode_body; + LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body; + + char decBuf[2][BLOCK_BYTES]; + int decBufIndex = 0; + + LZ4_setStreamDecode(lz4StreamDecode, NULL, 0); + + for(;;) { + char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)]; + int cmpBytes = 0; + + { + const size_t readCount0 = read_int(inpFp, &cmpBytes); + if(readCount0 != 1 || cmpBytes <= 0) { + break; + } + + const size_t readCount1 = read_bin(inpFp, cmpBuf, (size_t) cmpBytes); + if(readCount1 != (size_t) cmpBytes) { + break; + } + } + + { + char* const decPtr = decBuf[decBufIndex]; + const int decBytes = LZ4_decompress_safe_continue( + lz4StreamDecode, cmpBuf, decPtr, cmpBytes, BLOCK_BYTES); + if(decBytes <= 0) { + break; + } + write_bin(outFp, decPtr, (size_t) decBytes); + } + + decBufIndex = (decBufIndex + 1) % 2; + } +} + + +int compare(FILE* fp0, FILE* fp1) +{ + int result = 0; + + while(0 == result) { + char b0[65536]; + char b1[65536]; + const size_t r0 = read_bin(fp0, b0, sizeof(b0)); + const size_t r1 = read_bin(fp1, b1, sizeof(b1)); + + result = (int) r0 - (int) r1; + + if(0 == r0 || 0 == r1) { + break; + } + if(0 == result) { + result = memcmp(b0, b1, r0); + } + } + + return result; +} + + +int main(int argc, char* argv[]) +{ + char inpFilename[256] = { 0 }; + char lz4Filename[256] = { 0 }; + char decFilename[256] = { 0 }; + + if(argc < 2) { + printf("Please specify input filename\n"); + return 0; + } + + snprintf(inpFilename, 256, "%s", argv[1]); + snprintf(lz4Filename, 256, "%s.lz4s-%d", argv[1], BLOCK_BYTES); + snprintf(decFilename, 256, "%s.lz4s-%d.dec", argv[1], BLOCK_BYTES); + + printf("inp = [%s]\n", inpFilename); + printf("lz4 = [%s]\n", lz4Filename); + printf("dec = [%s]\n", decFilename); + + // compress + { + FILE* inpFp = fopen(inpFilename, "rb"); + FILE* outFp = fopen(lz4Filename, "wb"); + + printf("compress : %s -> %s\n", inpFilename, lz4Filename); + test_compress(outFp, inpFp); + printf("compress : done\n"); + + fclose(outFp); + fclose(inpFp); + } + + // decompress + { + FILE* inpFp = fopen(lz4Filename, "rb"); + FILE* outFp = fopen(decFilename, "wb"); + + printf("decompress : %s -> %s\n", lz4Filename, decFilename); + test_decompress(outFp, inpFp); + printf("decompress : done\n"); + + fclose(outFp); + fclose(inpFp); + } + + // verify + { + FILE* inpFp = fopen(inpFilename, "rb"); + FILE* decFp = fopen(decFilename, "rb"); + + printf("verify : %s <-> %s\n", inpFilename, decFilename); + const int cmp = compare(inpFp, decFp); + if(0 == cmp) { + printf("verify : OK\n"); + } else { + printf("verify : NG\n"); + } + + fclose(decFp); + fclose(inpFp); + } + + return 0; +} diff --git a/examples/blockStreaming_doubleBuffer.md b/examples/blockStreaming_doubleBuffer.md new file mode 100644 index 0000000..38dc2e8 --- /dev/null +++ b/examples/blockStreaming_doubleBuffer.md @@ -0,0 +1,100 @@ +# LZ4 Streaming API Example : Double Buffer +by *Takayuki Matsuoka* + +`blockStreaming_doubleBuffer.c` is LZ4 Streaming API example which implements double buffer (de)compression. + +Please note : + + - Firstly, read "LZ4 Streaming API Basics". + - This is relatively advanced application example. + - Output file is not compatible with lz4frame and platform dependent. + + +## What's the point of this example ? + + - Handle huge file in small amount of memory + - Always better compression ratio than Block API + - Uniform block size + + +## How the compression works + +First of all, allocate "Double Buffer" for input and LZ4 compressed data buffer for output. +Double buffer has two pages, "first" page (Page#1) and "second" page (Page#2). + +``` + Double Buffer + + Page#1 Page#2 + +---------+---------+ + | Block#1 | | + +----+----+---------+ + | + v + {Out#1} + + + Prefix Dependency + +---------+ + | | + v | + +---------+----+----+ + | Block#1 | Block#2 | + +---------+----+----+ + | + v + {Out#2} + + + External Dictionary Mode + +---------+ + | | + | v + +----+----+---------+ + | Block#3 | Block#2 | + +----+----+---------+ + | + v + {Out#3} + + + Prefix Dependency + +---------+ + | | + v | + +---------+----+----+ + | Block#3 | Block#4 | + +---------+----+----+ + | + v + {Out#4} +``` + +Next, read first block to double buffer's first page. And compress it by `LZ4_compress_continue()`. +For the first time, LZ4 doesn't know any previous dependencies, +so it just compress the line without dependencies and generates compressed block {Out#1} to LZ4 compressed data buffer. +After that, write {Out#1} to the file. + +Next, read second block to double buffer's second page. And compress it. +This time, LZ4 can use dependency to Block#1 to improve compression ratio. +This dependency is called "Prefix mode". + +Next, read third block to double buffer's *first* page, and compress it. +Also this time, LZ4 can use dependency to Block#2. +This dependency is called "External Dictonaly mode". + +Continue these procedure to the end of the file. + + +## How the decompression works + +Decompression will do reverse order. + + - Read first compressed block. + - Decompress it to the first page and write that page to the file. + - Read second compressed block. + - Decompress it to the second page and write that page to the file. + - Read third compressed block. + - Decompress it to the *first* page and write that page to the file. + +Continue these procedure to the end of the compressed file. diff --git a/examples/blockStreaming_lineByLine.c b/examples/blockStreaming_lineByLine.c new file mode 100644 index 0000000..19c3345 --- /dev/null +++ b/examples/blockStreaming_lineByLine.c @@ -0,0 +1,211 @@ +// LZ4 streaming API example : line-by-line logfile compression +// by Takayuki Matsuoka + + +#if defined(_MSC_VER) && (_MSC_VER <= 1800) /* Visual Studio <= 2013 */ +# define _CRT_SECURE_NO_WARNINGS +# define snprintf sprintf_s +#endif +#include "lz4.h" + +#include +#include +#include +#include + +static size_t write_uint16(FILE* fp, uint16_t i) +{ + return fwrite(&i, sizeof(i), 1, fp); +} + +static size_t write_bin(FILE* fp, const void* array, int arrayBytes) +{ + return fwrite(array, 1, arrayBytes, fp); +} + +static size_t read_uint16(FILE* fp, uint16_t* i) +{ + return fread(i, sizeof(*i), 1, fp); +} + +static size_t read_bin(FILE* fp, void* array, int arrayBytes) +{ + return fread(array, 1, arrayBytes, fp); +} + + +static void test_compress( + FILE* outFp, + FILE* inpFp, + size_t messageMaxBytes, + size_t ringBufferBytes) +{ + LZ4_stream_t* const lz4Stream = LZ4_createStream(); + const size_t cmpBufBytes = LZ4_COMPRESSBOUND(messageMaxBytes); + char* const cmpBuf = (char*) malloc(cmpBufBytes); + char* const inpBuf = (char*) malloc(ringBufferBytes); + int inpOffset = 0; + + for ( ; ; ) + { + char* const inpPtr = &inpBuf[inpOffset]; + +#if 0 + // Read random length data to the ring buffer. + const int randomLength = (rand() % messageMaxBytes) + 1; + const int inpBytes = (int) read_bin(inpFp, inpPtr, randomLength); + if (0 == inpBytes) break; +#else + // Read line to the ring buffer. + int inpBytes = 0; + if (!fgets(inpPtr, (int) messageMaxBytes, inpFp)) + break; + inpBytes = (int) strlen(inpPtr); +#endif + + { + const int cmpBytes = LZ4_compress_fast_continue( + lz4Stream, inpPtr, cmpBuf, inpBytes, cmpBufBytes, 1); + if (cmpBytes <= 0) break; + write_uint16(outFp, (uint16_t) cmpBytes); + write_bin(outFp, cmpBuf, cmpBytes); + + // Add and wraparound the ringbuffer offset + inpOffset += inpBytes; + if ((size_t)inpOffset >= ringBufferBytes - messageMaxBytes) inpOffset = 0; + } + } + write_uint16(outFp, 0); + + free(inpBuf); + free(cmpBuf); + LZ4_freeStream(lz4Stream); +} + + +static void test_decompress( + FILE* outFp, + FILE* inpFp, + size_t messageMaxBytes, + size_t ringBufferBytes) +{ + LZ4_streamDecode_t* const lz4StreamDecode = LZ4_createStreamDecode(); + char* const cmpBuf = (char*) malloc(LZ4_COMPRESSBOUND(messageMaxBytes)); + char* const decBuf = (char*) malloc(ringBufferBytes); + int decOffset = 0; + + for ( ; ; ) + { + uint16_t cmpBytes = 0; + + if (read_uint16(inpFp, &cmpBytes) != 1) break; + if (cmpBytes == 0) break; + if (read_bin(inpFp, cmpBuf, cmpBytes) != cmpBytes) break; + + { + char* const decPtr = &decBuf[decOffset]; + const int decBytes = LZ4_decompress_safe_continue( + lz4StreamDecode, cmpBuf, decPtr, cmpBytes, (int) messageMaxBytes); + if (decBytes <= 0) break; + write_bin(outFp, decPtr, decBytes); + + // Add and wraparound the ringbuffer offset + decOffset += decBytes; + if ((size_t)decOffset >= ringBufferBytes - messageMaxBytes) decOffset = 0; + } + } + + free(decBuf); + free(cmpBuf); + LZ4_freeStreamDecode(lz4StreamDecode); +} + + +static int compare(FILE* f0, FILE* f1) +{ + int result = 0; + const size_t tempBufferBytes = 65536; + char* const b0 = (char*) malloc(tempBufferBytes); + char* const b1 = (char*) malloc(tempBufferBytes); + + while(0 == result) + { + const size_t r0 = fread(b0, 1, tempBufferBytes, f0); + const size_t r1 = fread(b1, 1, tempBufferBytes, f1); + + result = (int) r0 - (int) r1; + + if (0 == r0 || 0 == r1) break; + if (0 == result) result = memcmp(b0, b1, r0); + } + + free(b1); + free(b0); + return result; +} + + +int main(int argc, char* argv[]) +{ + enum { + MESSAGE_MAX_BYTES = 1024, + RING_BUFFER_BYTES = 1024 * 256 + MESSAGE_MAX_BYTES, + }; + + char inpFilename[256] = { 0 }; + char lz4Filename[256] = { 0 }; + char decFilename[256] = { 0 }; + + if (argc < 2) + { + printf("Please specify input filename\n"); + return 0; + } + + snprintf(inpFilename, 256, "%s", argv[1]); + snprintf(lz4Filename, 256, "%s.lz4s", argv[1]); + snprintf(decFilename, 256, "%s.lz4s.dec", argv[1]); + + printf("inp = [%s]\n", inpFilename); + printf("lz4 = [%s]\n", lz4Filename); + printf("dec = [%s]\n", decFilename); + + // compress + { + FILE* inpFp = fopen(inpFilename, "rb"); + FILE* outFp = fopen(lz4Filename, "wb"); + + test_compress(outFp, inpFp, MESSAGE_MAX_BYTES, RING_BUFFER_BYTES); + + fclose(outFp); + fclose(inpFp); + } + + // decompress + { + FILE* inpFp = fopen(lz4Filename, "rb"); + FILE* outFp = fopen(decFilename, "wb"); + + test_decompress(outFp, inpFp, MESSAGE_MAX_BYTES, RING_BUFFER_BYTES); + + fclose(outFp); + fclose(inpFp); + } + + // verify + { + FILE* inpFp = fopen(inpFilename, "rb"); + FILE* decFp = fopen(decFilename, "rb"); + + const int cmp = compare(inpFp, decFp); + if (0 == cmp) + printf("Verify : OK\n"); + else + printf("Verify : NG\n"); + + fclose(decFp); + fclose(inpFp); + } + + return 0; +} diff --git a/examples/blockStreaming_lineByLine.md b/examples/blockStreaming_lineByLine.md new file mode 100644 index 0000000..4735f92 --- /dev/null +++ b/examples/blockStreaming_lineByLine.md @@ -0,0 +1,122 @@ +# LZ4 Streaming API Example : Line by Line Text Compression +by *Takayuki Matsuoka* + +`blockStreaming_lineByLine.c` is LZ4 Straming API example which implements line by line incremental (de)compression. + +Please note the following restrictions : + + - Firstly, read "LZ4 Streaming API Basics". + - This is relatively advanced application example. + - Output file is not compatible with lz4frame and platform dependent. + + +## What's the point of this example ? + + - Line by line incremental (de)compression. + - Handle huge file in small amount of memory + - Generally better compression ratio than Block API + - Non-uniform block size + + +## How the compression works + +First of all, allocate "Ring Buffer" for input and LZ4 compressed data buffer for output. + +``` +(1) + Ring Buffer + + +--------+ + | Line#1 | + +---+----+ + | + v + {Out#1} + + +(2) + Prefix Mode Dependency + +----+ + | | + v | + +--------+-+------+ + | Line#1 | Line#2 | + +--------+---+----+ + | + v + {Out#2} + + +(3) + Prefix Prefix + +----+ +----+ + | | | | + v | v | + +--------+-+------+-+------+ + | Line#1 | Line#2 | Line#3 | + +--------+--------+---+----+ + | + v + {Out#3} + + +(4) + External Dictionary Mode + +----+ +----+ + | | | | + v | v | + ------+--------+-+------+-+--------+ + | .... | Line#X | Line#X+1 | + ------+--------+--------+-----+----+ + ^ | + | v + | {Out#X+1} + | + Reset + + +(5) + Prefix + +-----+ + | | + v | + ------+--------+--------+----------+--+-------+ + | .... | Line#X | Line#X+1 | Line#X+2 | + ------+--------+--------+----------+-----+----+ + ^ | + | v + | {Out#X+2} + | + Reset +``` + +Next (see (1)), read first line to ringbuffer and compress it by `LZ4_compress_continue()`. +For the first time, LZ4 doesn't know any previous dependencies, +so it just compress the line without dependencies and generates compressed line {Out#1} to LZ4 compressed data buffer. +After that, write {Out#1} to the file and forward ringbuffer offset. + +Do the same things to second line (see (2)). +But in this time, LZ4 can use dependency to Line#1 to improve compression ratio. +This dependency is called "Prefix mode". + +Eventually, we'll reach end of ringbuffer at Line#X (see (4)). +This time, we should reset ringbuffer offset. +After resetting, at Line#X+1 pointer is not adjacent, but LZ4 still maintain its memory. +This is called "External Dictionary Mode". + +In Line#X+2 (see (5)), finally LZ4 forget almost all memories but still remains Line#X+1. +This is the same situation as Line#2. + +Continue these procedure to the end of text file. + + +## How the decompression works + +Decompression will do reverse order. + + - Read compressed line from the file to buffer. + - Decompress it to the ringbuffer. + - Output decompressed plain text line to the file. + - Forward ringbuffer offset. If offset exceedes end of the ringbuffer, reset it. + +Continue these procedure to the end of the compressed file. diff --git a/examples/blockStreaming_ringBuffer.c b/examples/blockStreaming_ringBuffer.c new file mode 100644 index 0000000..0b6a3ce --- /dev/null +++ b/examples/blockStreaming_ringBuffer.c @@ -0,0 +1,190 @@ +/* LZ4 streaming API example : ring buffer + * Based on sample code from Takayuki Matsuoka */ + + +/************************************** + * Compiler Options + **************************************/ +#if defined(_MSC_VER) && (_MSC_VER <= 1800) /* Visual Studio <= 2013 */ +# define _CRT_SECURE_NO_WARNINGS +# define snprintf sprintf_s +#endif + + +/************************************** + * Includes + **************************************/ +#include +#include +#include +#include +#include "lz4.h" + + +enum { + MESSAGE_MAX_BYTES = 1024, + RING_BUFFER_BYTES = 1024 * 8 + MESSAGE_MAX_BYTES, + DECODE_RING_BUFFER = RING_BUFFER_BYTES + MESSAGE_MAX_BYTES /* Intentionally larger, to test unsynchronized ring buffers */ +}; + + +size_t write_int32(FILE* fp, int32_t i) { + return fwrite(&i, sizeof(i), 1, fp); +} + +size_t write_bin(FILE* fp, const void* array, int arrayBytes) { + return fwrite(array, 1, arrayBytes, fp); +} + +size_t read_int32(FILE* fp, int32_t* i) { + return fread(i, sizeof(*i), 1, fp); +} + +size_t read_bin(FILE* fp, void* array, int arrayBytes) { + return fread(array, 1, arrayBytes, fp); +} + + +void test_compress(FILE* outFp, FILE* inpFp) +{ + LZ4_stream_t lz4Stream_body = { { 0 } }; + LZ4_stream_t* lz4Stream = &lz4Stream_body; + + static char inpBuf[RING_BUFFER_BYTES]; + int inpOffset = 0; + + for(;;) { + // Read random length ([1,MESSAGE_MAX_BYTES]) data to the ring buffer. + char* const inpPtr = &inpBuf[inpOffset]; + const int randomLength = (rand() % MESSAGE_MAX_BYTES) + 1; + const int inpBytes = (int) read_bin(inpFp, inpPtr, randomLength); + if (0 == inpBytes) break; + + { +#define CMPBUFSIZE (LZ4_COMPRESSBOUND(MESSAGE_MAX_BYTES)) + char cmpBuf[CMPBUFSIZE]; + const int cmpBytes = LZ4_compress_fast_continue(lz4Stream, inpPtr, cmpBuf, inpBytes, CMPBUFSIZE, 0); + if(cmpBytes <= 0) break; + write_int32(outFp, cmpBytes); + write_bin(outFp, cmpBuf, cmpBytes); + + inpOffset += inpBytes; + + // Wraparound the ringbuffer offset + if(inpOffset >= RING_BUFFER_BYTES - MESSAGE_MAX_BYTES) inpOffset = 0; + } + } + + write_int32(outFp, 0); +} + + +void test_decompress(FILE* outFp, FILE* inpFp) +{ + static char decBuf[DECODE_RING_BUFFER]; + int decOffset = 0; + LZ4_streamDecode_t lz4StreamDecode_body = { { 0 } }; + LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body; + + for(;;) { + int cmpBytes = 0; + char cmpBuf[CMPBUFSIZE]; + + { const size_t r0 = read_int32(inpFp, &cmpBytes); + if(r0 != 1 || cmpBytes <= 0) break; + + const size_t r1 = read_bin(inpFp, cmpBuf, cmpBytes); + if(r1 != (size_t) cmpBytes) break; + } + + { char* const decPtr = &decBuf[decOffset]; + const int decBytes = LZ4_decompress_safe_continue( + lz4StreamDecode, cmpBuf, decPtr, cmpBytes, MESSAGE_MAX_BYTES); + if(decBytes <= 0) break; + decOffset += decBytes; + write_bin(outFp, decPtr, decBytes); + + // Wraparound the ringbuffer offset + if(decOffset >= DECODE_RING_BUFFER - MESSAGE_MAX_BYTES) decOffset = 0; + } + } +} + + +int compare(FILE* f0, FILE* f1) +{ + int result = 0; + + while (0 == result) { + char b0[65536]; + char b1[65536]; + const size_t r0 = fread(b0, 1, sizeof(b0), f0); + const size_t r1 = fread(b1, 1, sizeof(b1), f1); + + result = (int) r0 - (int) r1; + + if (0 == r0 || 0 == r1) break; + + if (0 == result) result = memcmp(b0, b1, r0); + } + + return result; +} + + +int main(int argc, char** argv) +{ + char inpFilename[256] = { 0 }; + char lz4Filename[256] = { 0 }; + char decFilename[256] = { 0 }; + + if (argc < 2) { + printf("Please specify input filename\n"); + return 0; + } + + snprintf(inpFilename, 256, "%s", argv[1]); + snprintf(lz4Filename, 256, "%s.lz4s-%d", argv[1], 0); + snprintf(decFilename, 256, "%s.lz4s-%d.dec", argv[1], 0); + + printf("inp = [%s]\n", inpFilename); + printf("lz4 = [%s]\n", lz4Filename); + printf("dec = [%s]\n", decFilename); + + // compress + { FILE* const inpFp = fopen(inpFilename, "rb"); + FILE* const outFp = fopen(lz4Filename, "wb"); + + test_compress(outFp, inpFp); + + fclose(outFp); + fclose(inpFp); + } + + // decompress + { FILE* const inpFp = fopen(lz4Filename, "rb"); + FILE* const outFp = fopen(decFilename, "wb"); + + test_decompress(outFp, inpFp); + + fclose(outFp); + fclose(inpFp); + } + + // verify + { FILE* const inpFp = fopen(inpFilename, "rb"); + FILE* const decFp = fopen(decFilename, "rb"); + + const int cmp = compare(inpFp, decFp); + if (0 == cmp) { + printf("Verify : OK\n"); + } else { + printf("Verify : NG\n"); + } + + fclose(decFp); + fclose(inpFp); + } + + return 0; +} diff --git a/examples/compress_functions.c b/examples/compress_functions.c new file mode 100644 index 0000000..7fd6775 --- /dev/null +++ b/examples/compress_functions.c @@ -0,0 +1,363 @@ +/* + * compress_functions.c + * Copyright : Kyle Harper + * License : Follows same licensing as the lz4.c/lz4.h program at any given time. Currently, BSD 2. + * Description: A program to demonstrate the various compression functions involved in when using LZ4_compress_default(). The idea + * is to show how each step in the call stack can be used directly, if desired. There is also some benchmarking for + * each function to demonstrate the (probably lack of) performance difference when jumping the stack. + * (If you're new to lz4, please read simple_buffer.c to understand the fundamentals) + * + * The call stack (before theoretical compiler optimizations) for LZ4_compress_default is as follows: + * LZ4_compress_default + * LZ4_compress_fast + * LZ4_compress_fast_extState + * LZ4_compress_generic + * + * LZ4_compress_default() + * This is the recommended function for compressing data. It will serve as the baseline for comparison. + * LZ4_compress_fast() + * Despite its name, it's not a "fast" version of compression. It simply decides if HEAPMODE is set and either + * allocates memory on the heap for a struct or creates the struct directly on the stack. Stack access is generally + * faster but this function itself isn't giving that advantage, it's just some logic for compile time. + * LZ4_compress_fast_extState() + * This simply accepts all the pointers and values collected thus far and adds logic to determine how + * LZ4_compress_generic should be invoked; specifically: can the source fit into a single pass as determined by + * LZ4_64Klimit. + * LZ4_compress_generic() + * As the name suggests, this is the generic function that ultimately does most of the heavy lifting. Calling this + * directly can help avoid some test cases and branching which might be useful in some implementation-specific + * situations, but you really need to know what you're doing AND what you're asking lz4 to do! You also need a + * wrapper function because this function isn't exposed with lz4.h. + * + * The call stack for decompression functions is shallow. There are 2 options: + * LZ4_decompress_safe || LZ4_decompress_fast + * LZ4_decompress_generic + * + * LZ4_decompress_safe + * This is the recommended function for decompressing data. It is considered safe because the caller specifies + * both the size of the compresssed buffer to read as well as the maximum size of the output (decompressed) buffer + * instead of just the latter. + * LZ4_decompress_fast + * Again, despite its name it's not a "fast" version of decompression. It simply frees the caller of sending the + * size of the compressed buffer (it will simply be read-to-end, hence it's non-safety). + * LZ4_decompress_generic + * This is the generic function that both of the LZ4_decompress_* functions above end up calling. Calling this + * directly is not advised, period. Furthermore, it is a static inline function in lz4.c, so there isn't a symbol + * exposed for anyone using lz4.h to utilize. + * + * Special Note About Decompression: + * Using the LZ4_decompress_safe() function protects against malicious (user) input. If you are using data from a + * trusted source, or if your program is the producer (P) as well as its consumer (C) in a PC or MPMC setup, you can + * safely use the LZ4_decompress_fast function + */ + +/* Since lz4 compiles with c99 and not gnu/std99 we need to enable POSIX linking for time.h structs and functions. */ +#if __STDC_VERSION__ >= 199901L +#define _XOPEN_SOURCE 600 +#else +#define _XOPEN_SOURCE 500 +#endif +#define _POSIX_C_SOURCE 199309L + +/* Includes, for Power! */ +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* LZ4_decompress_fast */ +#include "lz4.h" +#include /* for printf() */ +#include /* for exit() */ +#include /* for atoi() memcmp() */ +#include /* for uint_types */ +#include /* for PRIu64 */ +#include /* for clock_gettime() */ +#include /* for setlocale() */ + +/* We need to know what one billion is for clock timing. */ +#define BILLION 1000000000L + +/* Create a crude set of test IDs so we can switch on them later (Can't switch() on a char[] or char*). */ +#define ID__LZ4_COMPRESS_DEFAULT 1 +#define ID__LZ4_COMPRESS_FAST 2 +#define ID__LZ4_COMPRESS_FAST_EXTSTATE 3 +#define ID__LZ4_COMPRESS_GENERIC 4 +#define ID__LZ4_DECOMPRESS_SAFE 5 +#define ID__LZ4_DECOMPRESS_FAST 6 + + + +/* + * Easy show-error-and-bail function. + */ +void run_screaming(const char *message, const int code) { + printf("%s\n", message); + exit(code); +} + + +/* + * Centralize the usage function to keep main cleaner. + */ +void usage(const char *message) { + printf("Usage: ./argPerformanceTesting \n"); + run_screaming(message, 1); + return; +} + + + +/* + * Runs the benchmark for LZ4_compress_* based on function_id. + */ +uint64_t bench( + const char *known_good_dst, + const int function_id, + const int iterations, + const char *src, + char *dst, + const size_t src_size, + const size_t max_dst_size, + const size_t comp_size + ) { + uint64_t time_taken = 0; + int rv = 0; + const int warm_up = 5000; + struct timespec start, end; + const int acceleration = 1; + LZ4_stream_t state; + + // Select the right function to perform the benchmark on. We perform 5000 initial loops to warm the cache and ensure that dst + // remains matching to known_good_dst between successive calls. + switch(function_id) { + case ID__LZ4_COMPRESS_DEFAULT: + printf("Starting benchmark for function: LZ4_compress_default()\n"); + for(int junk=0; junk 1) + iterations = atoi(argv[1]); + if (iterations < 1) + usage("Argument 1 (iterations) must be > 0."); + + // First we will create 2 sources (char *) of 2000 bytes each. One normal text, the other highly-compressible text. + const char *src = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed luctus purus et risus vulputate, et mollis orci ullamcorper. Nulla facilisi. Fusce in ligula sed purus varius aliquet interdum vitae justo. Proin quis diam velit. Nulla varius iaculis auctor. Cras volutpat, justo eu dictum pulvinar, elit sem porttitor metus, et imperdiet metus sapien et ante. Nullam nisi nulla, ornare eu tristique eu, dignissim vitae diam. Nulla sagittis porta libero, a accumsan felis sagittis scelerisque. Integer laoreet eleifend congue. Etiam rhoncus leo vel dolor fermentum, quis luctus nisl iaculis. Praesent a erat sapien. Aliquam semper mi in lorem ultrices ultricies. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In feugiat risus sed enim ultrices, at sodales nulla tristique. Maecenas eget pellentesque justo, sed pellentesque lectus. Fusce sagittis sit amet elit vel varius. Donec sed ligula nec ligula vulputate rutrum sed ut lectus. Etiam congue pharetra leo vitae cursus. Morbi enim ante, porttitor ut varius vel, tincidunt quis justo. Nunc iaculis, risus id ultrices semper, metus est efficitur ligula, vel posuere risus nunc eget purus. Ut lorem turpis, condimentum at sem sed, porta aliquam turpis. In ut sapien a nulla dictum tincidunt quis sit amet lorem. Fusce at est egestas, luctus neque eu, consectetur tortor. Phasellus eleifend ultricies nulla ac lobortis. Morbi maximus quam cursus vehicula iaculis. Maecenas cursus vel justo ut rutrum. Curabitur magna orci, dignissim eget dapibus vitae, finibus id lacus. Praesent rhoncus mattis augue vitae bibendum. Praesent porta mauris non ultrices fermentum. Quisque vulputate ipsum in sodales pulvinar. Aliquam nec mollis felis. Donec vitae augue pulvinar, congue nisl sed, pretium purus. Fusce lobortis mi ac neque scelerisque semper. Pellentesque vel est vitae magna aliquet aliquet. Nam non dolor. Nulla facilisi. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Morbi ac lacinia felis metus."; + const char *hc_src = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; + // Set and derive sizes. Since we're using strings, use strlen() + 1 for \0. + const size_t src_size = strlen(src) + 1; + const size_t max_dst_size = LZ4_compressBound(src_size); + int bytes_returned = 0; + // Now build allocations for the data we'll be playing with. + char *dst = calloc(1, max_dst_size); + char *known_good_dst = calloc(1, max_dst_size); + char *known_good_hc_dst = calloc(1, max_dst_size); + if (dst == NULL || known_good_dst == NULL || known_good_hc_dst == NULL) + run_screaming("Couldn't allocate memory for the destination buffers. Sad :(", 1); + + // Create known-good buffers to verify our tests with other functions will produce the same results. + bytes_returned = LZ4_compress_default(src, known_good_dst, src_size, max_dst_size); + if (bytes_returned < 1) + run_screaming("Couldn't create a known-good destination buffer for comparison... this is bad.", 1); + const size_t src_comp_size = bytes_returned; + bytes_returned = LZ4_compress_default(hc_src, known_good_hc_dst, src_size, max_dst_size); + if (bytes_returned < 1) + run_screaming("Couldn't create a known-good (highly compressible) destination buffer for comparison... this is bad.", 1); + const size_t hc_src_comp_size = bytes_returned; + + + /* LZ4_compress_default() */ + // This is the default function so we don't need to demonstrate how to use it. See basics.c if you need more basal information. + + /* LZ4_compress_fast() */ + // Using this function is identical to LZ4_compress_default except we need to specify an "acceleration" value. Defaults to 1. + memset(dst, 0, max_dst_size); + bytes_returned = LZ4_compress_fast(src, dst, src_size, max_dst_size, 1); + if (bytes_returned < 1) + run_screaming("Failed to compress src using LZ4_compress_fast. echo $? for return code.", bytes_returned); + if (memcmp(dst, known_good_dst, bytes_returned) != 0) + run_screaming("According to memcmp(), the value we got in dst from LZ4_compress_fast doesn't match the known-good value. This is bad.", 1); + + /* LZ4_compress_fast_extState() */ + // Using this function directly requires that we build an LZ4_stream_t struct ourselves. We do NOT have to reset it ourselves. + memset(dst, 0, max_dst_size); + LZ4_stream_t state; + bytes_returned = LZ4_compress_fast_extState(&state, src, dst, src_size, max_dst_size, 1); + if (bytes_returned < 1) + run_screaming("Failed to compress src using LZ4_compress_fast_extState. echo $? for return code.", bytes_returned); + if (memcmp(dst, known_good_dst, bytes_returned) != 0) + run_screaming("According to memcmp(), the value we got in dst from LZ4_compress_fast_extState doesn't match the known-good value. This is bad.", 1); + + /* LZ4_compress_generic */ + // When you can exactly control the inputs and options of your LZ4 needs, you can use LZ4_compress_generic and fixed (const) + // values for the enum types such as dictionary and limitations. Any other direct-use is probably a bad idea. + // + // That said, the LZ4_compress_generic() function is 'static inline' and does not have a prototype in lz4.h to expose a symbol + // for it. In other words: we can't access it directly. I don't want to submit a PR that modifies lz4.c/h. Yann and others can + // do that if they feel it's worth expanding this example. + // + // I will, however, leave a skeleton of what would be required to use it directly: + /* + memset(dst, 0, max_dst_size); + // LZ4_stream_t state: is already declared above. We can reuse it BUT we have to reset the stream ourselves between each call. + LZ4_resetStream((LZ4_stream_t *)&state); + // Since src size is small we know the following enums will be used: notLimited (0), byU16 (2), noDict (0), noDictIssue (0). + bytes_returned = LZ4_compress_generic(&state, src, dst, src_size, max_dst_size, notLimited, byU16, noDict, noDictIssue, 1); + if (bytes_returned < 1) + run_screaming("Failed to compress src using LZ4_compress_generic. echo $? for return code.", bytes_returned); + if (memcmp(dst, known_good_dst, bytes_returned) != 0) + run_screaming("According to memcmp(), the value we got in dst from LZ4_compress_generic doesn't match the known-good value. This is bad.", 1); + */ + + + /* Benchmarking */ + /* Now we'll run a few rudimentary benchmarks with each function to demonstrate differences in speed based on the function used. + * Remember, we cannot call LZ4_compress_generic() directly (yet) so it's disabled. + */ + // Suite A - Normal Compressibility + char *dst_d = calloc(1, src_size); + memset(dst, 0, max_dst_size); + printf("\nStarting suite A: Normal compressible text.\n"); + uint64_t time_taken__default = bench(known_good_dst, ID__LZ4_COMPRESS_DEFAULT, iterations, src, dst, src_size, max_dst_size, src_comp_size); + uint64_t time_taken__fast = bench(known_good_dst, ID__LZ4_COMPRESS_FAST, iterations, src, dst, src_size, max_dst_size, src_comp_size); + uint64_t time_taken__fast_extstate = bench(known_good_dst, ID__LZ4_COMPRESS_FAST_EXTSTATE, iterations, src, dst, src_size, max_dst_size, src_comp_size); + //uint64_t time_taken__generic = bench(known_good_dst, ID__LZ4_COMPRESS_GENERIC, iterations, src, dst, src_size, max_dst_size, src_comp_size); + uint64_t time_taken__decomp_safe = bench(src, ID__LZ4_DECOMPRESS_SAFE, iterations, known_good_dst, dst_d, src_size, max_dst_size, src_comp_size); + uint64_t time_taken__decomp_fast = bench(src, ID__LZ4_DECOMPRESS_FAST, iterations, known_good_dst, dst_d, src_size, max_dst_size, src_comp_size); + // Suite B - Highly Compressible + memset(dst, 0, max_dst_size); + printf("\nStarting suite B: Highly compressible text.\n"); + uint64_t time_taken_hc__default = bench(known_good_hc_dst, ID__LZ4_COMPRESS_DEFAULT, iterations, hc_src, dst, src_size, max_dst_size, hc_src_comp_size); + uint64_t time_taken_hc__fast = bench(known_good_hc_dst, ID__LZ4_COMPRESS_FAST, iterations, hc_src, dst, src_size, max_dst_size, hc_src_comp_size); + uint64_t time_taken_hc__fast_extstate = bench(known_good_hc_dst, ID__LZ4_COMPRESS_FAST_EXTSTATE, iterations, hc_src, dst, src_size, max_dst_size, hc_src_comp_size); + //uint64_t time_taken_hc__generic = bench(known_good_hc_dst, ID__LZ4_COMPRESS_GENERIC, iterations, hc_src, dst, src_size, max_dst_size, hc_src_comp_size); + uint64_t time_taken_hc__decomp_safe = bench(hc_src, ID__LZ4_DECOMPRESS_SAFE, iterations, known_good_hc_dst, dst_d, src_size, max_dst_size, hc_src_comp_size); + uint64_t time_taken_hc__decomp_fast = bench(hc_src, ID__LZ4_DECOMPRESS_FAST, iterations, known_good_hc_dst, dst_d, src_size, max_dst_size, hc_src_comp_size); + + // Report and leave. + setlocale(LC_ALL, ""); + const char *format = "|%-14s|%-30s|%'14.9f|%'16d|%'14d|%'13.2f%%|\n"; + const char *header_format = "|%-14s|%-30s|%14s|%16s|%14s|%14s|\n"; + const char *separator = "+--------------+------------------------------+--------------+----------------+--------------+--------------+\n"; + printf("\n"); + printf("%s", separator); + printf(header_format, "Source", "Function Benchmarked", "Total Seconds", "Iterations/sec", "ns/Iteration", "% of default"); + printf("%s", separator); + printf(format, "Normal Text", "LZ4_compress_default()", (double)time_taken__default / BILLION, (int)(iterations / ((double)time_taken__default /BILLION)), (int)time_taken__default / iterations, (double)time_taken__default * 100 / time_taken__default); + printf(format, "Normal Text", "LZ4_compress_fast()", (double)time_taken__fast / BILLION, (int)(iterations / ((double)time_taken__fast /BILLION)), (int)time_taken__fast / iterations, (double)time_taken__fast * 100 / time_taken__default); + printf(format, "Normal Text", "LZ4_compress_fast_extState()", (double)time_taken__fast_extstate / BILLION, (int)(iterations / ((double)time_taken__fast_extstate /BILLION)), (int)time_taken__fast_extstate / iterations, (double)time_taken__fast_extstate * 100 / time_taken__default); + //printf(format, "Normal Text", "LZ4_compress_generic()", (double)time_taken__generic / BILLION, (int)(iterations / ((double)time_taken__generic /BILLION)), (int)time_taken__generic / iterations, (double)time_taken__generic * 100 / time_taken__default); + printf(format, "Normal Text", "LZ4_decompress_safe()", (double)time_taken__decomp_safe / BILLION, (int)(iterations / ((double)time_taken__decomp_safe /BILLION)), (int)time_taken__decomp_safe / iterations, (double)time_taken__decomp_safe * 100 / time_taken__default); + printf(format, "Normal Text", "LZ4_decompress_fast()", (double)time_taken__decomp_fast / BILLION, (int)(iterations / ((double)time_taken__decomp_fast /BILLION)), (int)time_taken__decomp_fast / iterations, (double)time_taken__decomp_fast * 100 / time_taken__default); + printf(header_format, "", "", "", "", "", ""); + printf(format, "Compressible", "LZ4_compress_default()", (double)time_taken_hc__default / BILLION, (int)(iterations / ((double)time_taken_hc__default /BILLION)), (int)time_taken_hc__default / iterations, (double)time_taken_hc__default * 100 / time_taken_hc__default); + printf(format, "Compressible", "LZ4_compress_fast()", (double)time_taken_hc__fast / BILLION, (int)(iterations / ((double)time_taken_hc__fast /BILLION)), (int)time_taken_hc__fast / iterations, (double)time_taken_hc__fast * 100 / time_taken_hc__default); + printf(format, "Compressible", "LZ4_compress_fast_extState()", (double)time_taken_hc__fast_extstate / BILLION, (int)(iterations / ((double)time_taken_hc__fast_extstate /BILLION)), (int)time_taken_hc__fast_extstate / iterations, (double)time_taken_hc__fast_extstate * 100 / time_taken_hc__default); + //printf(format, "Compressible", "LZ4_compress_generic()", (double)time_taken_hc__generic / BILLION, (int)(iterations / ((double)time_taken_hc__generic /BILLION)), (int)time_taken_hc__generic / iterations, (double)time_taken_hc__generic * 100 / time_taken_hc__default); + printf(format, "Compressible", "LZ4_decompress_safe()", (double)time_taken_hc__decomp_safe / BILLION, (int)(iterations / ((double)time_taken_hc__decomp_safe /BILLION)), (int)time_taken_hc__decomp_safe / iterations, (double)time_taken_hc__decomp_safe * 100 / time_taken_hc__default); + printf(format, "Compressible", "LZ4_decompress_fast()", (double)time_taken_hc__decomp_fast / BILLION, (int)(iterations / ((double)time_taken_hc__decomp_fast /BILLION)), (int)time_taken_hc__decomp_fast / iterations, (double)time_taken_hc__decomp_fast * 100 / time_taken_hc__default); + printf("%s", separator); + printf("\n"); + printf("All done, ran %d iterations per test.\n", iterations); + return 0; +} diff --git a/examples/dictionaryRandomAccess.c b/examples/dictionaryRandomAccess.c new file mode 100644 index 0000000..ecb3b2d --- /dev/null +++ b/examples/dictionaryRandomAccess.c @@ -0,0 +1,280 @@ +// LZ4 API example : Dictionary Random Access + +#if defined(_MSC_VER) && (_MSC_VER <= 1800) /* Visual Studio <= 2013 */ +# define _CRT_SECURE_NO_WARNINGS +# define snprintf sprintf_s +#endif +#include "lz4.h" + +#include +#include +#include +#include + +#define MIN(x, y) ((x) < (y) ? (x) : (y)) + +enum { + BLOCK_BYTES = 1024, /* 1 KiB of uncompressed data in a block */ + DICTIONARY_BYTES = 1024, /* Load a 1 KiB dictionary */ + MAX_BLOCKS = 1024 /* For simplicity of implementation */ +}; + +/** + * Magic bytes for this test case. + * This is not a great magic number because it is a common word in ASCII. + * However, it is important to have some versioning system in your format. + */ +const char kTestMagic[] = { 'T', 'E', 'S', 'T' }; + + +void write_int(FILE* fp, int i) { + size_t written = fwrite(&i, sizeof(i), 1, fp); + if (written != 1) { exit(10); } +} + +void write_bin(FILE* fp, const void* array, size_t arrayBytes) { + size_t written = fwrite(array, 1, arrayBytes, fp); + if (written != arrayBytes) { exit(11); } +} + +void read_int(FILE* fp, int* i) { + size_t read = fread(i, sizeof(*i), 1, fp); + if (read != 1) { exit(12); } +} + +size_t read_bin(FILE* fp, void* array, size_t arrayBytes) { + size_t read = fread(array, 1, arrayBytes, fp); + if (ferror(fp)) { exit(12); } + return read; +} + +void seek_bin(FILE* fp, long offset, int origin) { + if (fseek(fp, offset, origin)) { exit(14); } +} + + +void test_compress(FILE* outFp, FILE* inpFp, void *dict, int dictSize) +{ + LZ4_stream_t lz4Stream_body; + LZ4_stream_t* lz4Stream = &lz4Stream_body; + + char inpBuf[BLOCK_BYTES]; + int offsets[MAX_BLOCKS]; + int *offsetsEnd = offsets; + + + LZ4_initStream(lz4Stream, sizeof(*lz4Stream)); + + /* Write header magic */ + write_bin(outFp, kTestMagic, sizeof(kTestMagic)); + + *offsetsEnd++ = sizeof(kTestMagic); + /* Write compressed data blocks. Each block contains BLOCK_BYTES of plain + data except possibly the last. */ + for(;;) { + const int inpBytes = (int) read_bin(inpFp, inpBuf, BLOCK_BYTES); + if(0 == inpBytes) { + break; + } + + /* Forget previously compressed data and load the dictionary */ + LZ4_loadDict(lz4Stream, dict, dictSize); + { + char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)]; + const int cmpBytes = LZ4_compress_fast_continue( + lz4Stream, inpBuf, cmpBuf, inpBytes, sizeof(cmpBuf), 1); + if(cmpBytes <= 0) { exit(1); } + write_bin(outFp, cmpBuf, (size_t)cmpBytes); + /* Keep track of the offsets */ + *offsetsEnd = *(offsetsEnd - 1) + cmpBytes; + ++offsetsEnd; + } + if (offsetsEnd - offsets > MAX_BLOCKS) { exit(2); } + } + /* Write the tailing jump table */ + { + int *ptr = offsets; + while (ptr != offsetsEnd) { + write_int(outFp, *ptr++); + } + write_int(outFp, offsetsEnd - offsets); + } +} + + +void test_decompress(FILE* outFp, FILE* inpFp, void *dict, int dictSize, int offset, int length) +{ + LZ4_streamDecode_t lz4StreamDecode_body; + LZ4_streamDecode_t* lz4StreamDecode = &lz4StreamDecode_body; + + /* The blocks [currentBlock, endBlock) contain the data we want */ + int currentBlock = offset / BLOCK_BYTES; + int endBlock = ((offset + length - 1) / BLOCK_BYTES) + 1; + + char decBuf[BLOCK_BYTES]; + int offsets[MAX_BLOCKS]; + + /* Special cases */ + if (length == 0) { return; } + + /* Read the magic bytes */ + { + char magic[sizeof(kTestMagic)]; + size_t read = read_bin(inpFp, magic, sizeof(magic)); + if (read != sizeof(magic)) { exit(1); } + if (memcmp(kTestMagic, magic, sizeof(magic))) { exit(2); } + } + + /* Read the offsets tail */ + { + int numOffsets; + int block; + int *offsetsPtr = offsets; + seek_bin(inpFp, -4, SEEK_END); + read_int(inpFp, &numOffsets); + if (numOffsets <= endBlock) { exit(3); } + seek_bin(inpFp, -4 * (numOffsets + 1), SEEK_END); + for (block = 0; block <= endBlock; ++block) { + read_int(inpFp, offsetsPtr++); + } + } + /* Seek to the first block to read */ + seek_bin(inpFp, offsets[currentBlock], SEEK_SET); + offset = offset % BLOCK_BYTES; + + /* Start decoding */ + for(; currentBlock < endBlock; ++currentBlock) { + char cmpBuf[LZ4_COMPRESSBOUND(BLOCK_BYTES)]; + /* The difference in offsets is the size of the block */ + int cmpBytes = offsets[currentBlock + 1] - offsets[currentBlock]; + { + const size_t read = read_bin(inpFp, cmpBuf, (size_t)cmpBytes); + if(read != (size_t)cmpBytes) { exit(4); } + } + + /* Load the dictionary */ + LZ4_setStreamDecode(lz4StreamDecode, dict, dictSize); + { + const int decBytes = LZ4_decompress_safe_continue( + lz4StreamDecode, cmpBuf, decBuf, cmpBytes, BLOCK_BYTES); + if(decBytes <= 0) { exit(5); } + { + /* Write out the part of the data we care about */ + int blockLength = MIN(length, (decBytes - offset)); + write_bin(outFp, decBuf + offset, (size_t)blockLength); + offset = 0; + length -= blockLength; + } + } + } +} + + +int compare(FILE* fp0, FILE* fp1, int length) +{ + int result = 0; + + while(0 == result) { + char b0[4096]; + char b1[4096]; + const size_t r0 = read_bin(fp0, b0, MIN(length, (int)sizeof(b0))); + const size_t r1 = read_bin(fp1, b1, MIN(length, (int)sizeof(b1))); + + result = (int) r0 - (int) r1; + + if(0 == r0 || 0 == r1) { + break; + } + if(0 == result) { + result = memcmp(b0, b1, r0); + } + length -= r0; + } + + return result; +} + + +int main(int argc, char* argv[]) +{ + char inpFilename[256] = { 0 }; + char lz4Filename[256] = { 0 }; + char decFilename[256] = { 0 }; + char dictFilename[256] = { 0 }; + int offset; + int length; + char dict[DICTIONARY_BYTES]; + int dictSize; + + if(argc < 5) { + printf("Usage: %s input dictionary offset length", argv[0]); + return 0; + } + + snprintf(inpFilename, 256, "%s", argv[1]); + snprintf(lz4Filename, 256, "%s.lz4s-%d", argv[1], BLOCK_BYTES); + snprintf(decFilename, 256, "%s.lz4s-%d.dec", argv[1], BLOCK_BYTES); + snprintf(dictFilename, 256, "%s", argv[2]); + offset = atoi(argv[3]); + length = atoi(argv[4]); + + printf("inp = [%s]\n", inpFilename); + printf("lz4 = [%s]\n", lz4Filename); + printf("dec = [%s]\n", decFilename); + printf("dict = [%s]\n", dictFilename); + printf("offset = [%d]\n", offset); + printf("length = [%d]\n", length); + + /* Load dictionary */ + { + FILE* dictFp = fopen(dictFilename, "rb"); + dictSize = (int)read_bin(dictFp, dict, DICTIONARY_BYTES); + fclose(dictFp); + } + + /* compress */ + { + FILE* inpFp = fopen(inpFilename, "rb"); + FILE* outFp = fopen(lz4Filename, "wb"); + + printf("compress : %s -> %s\n", inpFilename, lz4Filename); + test_compress(outFp, inpFp, dict, dictSize); + printf("compress : done\n"); + + fclose(outFp); + fclose(inpFp); + } + + /* decompress */ + { + FILE* inpFp = fopen(lz4Filename, "rb"); + FILE* outFp = fopen(decFilename, "wb"); + + printf("decompress : %s -> %s\n", lz4Filename, decFilename); + test_decompress(outFp, inpFp, dict, DICTIONARY_BYTES, offset, length); + printf("decompress : done\n"); + + fclose(outFp); + fclose(inpFp); + } + + /* verify */ + { + FILE* inpFp = fopen(inpFilename, "rb"); + FILE* decFp = fopen(decFilename, "rb"); + seek_bin(inpFp, offset, SEEK_SET); + + printf("verify : %s <-> %s\n", inpFilename, decFilename); + const int cmp = compare(inpFp, decFp, length); + if(0 == cmp) { + printf("verify : OK\n"); + } else { + printf("verify : NG\n"); + } + + fclose(decFp); + fclose(inpFp); + } + + return 0; +} diff --git a/examples/dictionaryRandomAccess.md b/examples/dictionaryRandomAccess.md new file mode 100644 index 0000000..53d825d --- /dev/null +++ b/examples/dictionaryRandomAccess.md @@ -0,0 +1,67 @@ +# LZ4 API Example : Dictionary Random Access + +`dictionaryRandomAccess.c` is LZ4 API example which implements dictionary compression and random access decompression. + +Please note that the output file is not compatible with lz4frame and is platform dependent. + + +## What's the point of this example ? + + - Dictionary based compression for homogenous files. + - Random access to compressed blocks. + + +## How the compression works + +Reads the dictionary from a file, and uses it as the history for each block. +This allows each block to be independent, but maintains compression ratio. + +``` + Dictionary + + + | + v + +---------+ + | Block#1 | + +----+----+ + | + v + {Out#1} + + + Dictionary + + + | + v + +---------+ + | Block#2 | + +----+----+ + | + v + {Out#2} +``` + +After writing the magic bytes `TEST` and then the compressed blocks, write out the jump table. +The last 4 bytes is an integer containing the number of blocks in the stream. +If there are `N` blocks, then just before the last 4 bytes is `N + 1` 4 byte integers containing the offsets at the beginning and end of each block. +Let `Offset#K` be the total number of bytes written after writing out `Block#K` *including* the magic bytes for simplicity. + +``` ++------+---------+ +---------+---+----------+ +----------+-----+ +| TEST | Block#1 | ... | Block#N | 4 | Offset#1 | ... | Offset#N | N+1 | ++------+---------+ +---------+---+----------+ +----------+-----+ +``` + +## How the decompression works + +Decompression will do reverse order. + + - Seek to the last 4 bytes of the file and read the number of offsets. + - Read each offset into an array. + - Seek to the first block containing data we want to read. + We know where to look because we know each block contains a fixed amount of uncompressed data, except possibly the last. + - Decompress it and write what data we need from it to the file. + - Read the next block. + - Decompress it and write that page to the file. + +Continue these procedure until all the required data has been read. diff --git a/examples/frameCompress.c b/examples/frameCompress.c new file mode 100644 index 0000000..aac4a3b --- /dev/null +++ b/examples/frameCompress.c @@ -0,0 +1,401 @@ +/* LZ4frame API example : compress a file + * Modified from an example code by Zbigniew Jędrzejewski-Szmek + * + * This example streams an input file into an output file + * using a bounded memory budget. + * Input is read in chunks of IN_CHUNK_SIZE */ + +#include +#include +#include +#include +#include + +#include + + +#define IN_CHUNK_SIZE (16*1024) + +static const LZ4F_preferences_t kPrefs = { + { LZ4F_max256KB, LZ4F_blockLinked, LZ4F_noContentChecksum, LZ4F_frame, + 0 /* unknown content size */, 0 /* no dictID */ , LZ4F_noBlockChecksum }, + 0, /* compression level; 0 == default */ + 0, /* autoflush */ + 0, /* favor decompression speed */ + { 0, 0, 0 }, /* reserved, must be set to 0 */ +}; + + +/* safe_fwrite() : + * performs fwrite(), ensure operation success, or immediately exit() */ +static void safe_fwrite(void* buf, size_t eltSize, size_t nbElt, FILE* f) +{ + size_t const writtenSize = fwrite(buf, eltSize, nbElt, f); + size_t const expectedSize = eltSize * nbElt; + if (nbElt>0) assert(expectedSize / nbElt == eltSize); /* check overflow */ + if (writtenSize < expectedSize) { + if (ferror(f)) /* note : ferror() must follow fwrite */ + fprintf(stderr, "Write failed \n"); + else + fprintf(stderr, "Write too short \n"); + exit(1); + } +} + + +/* ================================================= */ +/* Streaming Compression example */ +/* ================================================= */ + +typedef struct { + int error; + unsigned long long size_in; + unsigned long long size_out; +} compressResult_t; + +static compressResult_t +compress_file_internal(FILE* f_in, FILE* f_out, + LZ4F_compressionContext_t ctx, + void* inBuff, size_t inChunkSize, + void* outBuff, size_t outCapacity) +{ + compressResult_t result = { 1, 0, 0 }; /* result for an error */ + unsigned long long count_in = 0, count_out; + + assert(f_in != NULL); assert(f_out != NULL); + assert(ctx != NULL); + assert(outCapacity >= LZ4F_HEADER_SIZE_MAX); + assert(outCapacity >= LZ4F_compressBound(inChunkSize, &kPrefs)); + + /* write frame header */ + { size_t const headerSize = LZ4F_compressBegin(ctx, outBuff, outCapacity, &kPrefs); + if (LZ4F_isError(headerSize)) { + printf("Failed to start compression: error %u \n", (unsigned)headerSize); + return result; + } + count_out = headerSize; + printf("Buffer size is %u bytes, header size %u bytes \n", + (unsigned)outCapacity, (unsigned)headerSize); + safe_fwrite(outBuff, 1, headerSize, f_out); + } + + /* stream file */ + for (;;) { + size_t const readSize = fread(inBuff, 1, IN_CHUNK_SIZE, f_in); + if (readSize == 0) break; /* nothing left to read from input file */ + count_in += readSize; + + size_t const compressedSize = LZ4F_compressUpdate(ctx, + outBuff, outCapacity, + inBuff, readSize, + NULL); + if (LZ4F_isError(compressedSize)) { + printf("Compression failed: error %u \n", (unsigned)compressedSize); + return result; + } + + printf("Writing %u bytes\n", (unsigned)compressedSize); + safe_fwrite(outBuff, 1, compressedSize, f_out); + count_out += compressedSize; + } + + /* flush whatever remains within internal buffers */ + { size_t const compressedSize = LZ4F_compressEnd(ctx, + outBuff, outCapacity, + NULL); + if (LZ4F_isError(compressedSize)) { + printf("Failed to end compression: error %u \n", (unsigned)compressedSize); + return result; + } + + printf("Writing %u bytes \n", (unsigned)compressedSize); + safe_fwrite(outBuff, 1, compressedSize, f_out); + count_out += compressedSize; + } + + result.size_in = count_in; + result.size_out = count_out; + result.error = 0; + return result; +} + +static compressResult_t +compress_file(FILE* f_in, FILE* f_out) +{ + assert(f_in != NULL); + assert(f_out != NULL); + + /* ressource allocation */ + LZ4F_compressionContext_t ctx; + size_t const ctxCreation = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION); + void* const src = malloc(IN_CHUNK_SIZE); + size_t const outbufCapacity = LZ4F_compressBound(IN_CHUNK_SIZE, &kPrefs); /* large enough for any input <= IN_CHUNK_SIZE */ + void* const outbuff = malloc(outbufCapacity); + + compressResult_t result = { 1, 0, 0 }; /* == error (default) */ + if (!LZ4F_isError(ctxCreation) && src && outbuff) { + result = compress_file_internal(f_in, f_out, + ctx, + src, IN_CHUNK_SIZE, + outbuff, outbufCapacity); + } else { + printf("error : ressource allocation failed \n"); + } + + LZ4F_freeCompressionContext(ctx); /* supports free on NULL */ + free(src); + free(outbuff); + return result; +} + + +/* ================================================= */ +/* Streaming decompression example */ +/* ================================================= */ + +static size_t get_block_size(const LZ4F_frameInfo_t* info) { + switch (info->blockSizeID) { + case LZ4F_default: + case LZ4F_max64KB: return 1 << 16; + case LZ4F_max256KB: return 1 << 18; + case LZ4F_max1MB: return 1 << 20; + case LZ4F_max4MB: return 1 << 22; + default: + printf("Impossible with expected frame specification (<=v1.6.1)\n"); + exit(1); + } +} + +/* @return : 1==error, 0==success */ +static int +decompress_file_internal(FILE* f_in, FILE* f_out, + LZ4F_dctx* dctx, + void* src, size_t srcCapacity, size_t filled, size_t alreadyConsumed, + void* dst, size_t dstCapacity) +{ + int firstChunk = 1; + size_t ret = 1; + + assert(f_in != NULL); assert(f_out != NULL); + assert(dctx != NULL); + assert(src != NULL); assert(srcCapacity > 0); assert(filled <= srcCapacity); assert(alreadyConsumed <= filled); + assert(dst != NULL); assert(dstCapacity > 0); + + /* Decompression */ + while (ret != 0) { + /* Load more input */ + size_t readSize = firstChunk ? filled : fread(src, 1, srcCapacity, f_in); firstChunk=0; + const void* srcPtr = (const char*)src + alreadyConsumed; alreadyConsumed=0; + const void* const srcEnd = (const char*)srcPtr + readSize; + if (readSize == 0 || ferror(f_in)) { + printf("Decompress: not enough input or error reading file\n"); + return 1; + } + + /* Decompress: + * Continue while there is more input to read (srcPtr != srcEnd) + * and the frame isn't over (ret != 0) + */ + while (srcPtr < srcEnd && ret != 0) { + /* Any data within dst has been flushed at this stage */ + size_t dstSize = dstCapacity; + size_t srcSize = (const char*)srcEnd - (const char*)srcPtr; + ret = LZ4F_decompress(dctx, dst, &dstSize, srcPtr, &srcSize, /* LZ4F_decompressOptions_t */ NULL); + if (LZ4F_isError(ret)) { + printf("Decompression error: %s\n", LZ4F_getErrorName(ret)); + return 1; + } + /* Flush output */ + if (dstSize != 0) safe_fwrite(dst, 1, dstSize, f_out); + /* Update input */ + srcPtr = (const char*)srcPtr + srcSize; + } + + assert(srcPtr <= srcEnd); + + /* Ensure all input data has been consumed. + * It is valid to have multiple frames in the same file, + * but this example only supports one frame. + */ + if (srcPtr < srcEnd) { + printf("Decompress: Trailing data left in file after frame\n"); + return 1; + } + } + + /* Check that there isn't trailing data in the file after the frame. + * It is valid to have multiple frames in the same file, + * but this example only supports one frame. + */ + { size_t const readSize = fread(src, 1, 1, f_in); + if (readSize != 0 || !feof(f_in)) { + printf("Decompress: Trailing data left in file after frame\n"); + return 1; + } } + + return 0; +} + + +/* @return : 1==error, 0==completed */ +static int +decompress_file_allocDst(FILE* f_in, FILE* f_out, + LZ4F_dctx* dctx, + void* src, size_t srcCapacity) +{ + assert(f_in != NULL); assert(f_out != NULL); + assert(dctx != NULL); + assert(src != NULL); + assert(srcCapacity >= LZ4F_HEADER_SIZE_MAX); /* ensure LZ4F_getFrameInfo() can read enough data */ + + /* Read Frame header */ + size_t const readSize = fread(src, 1, srcCapacity, f_in); + if (readSize == 0 || ferror(f_in)) { + printf("Decompress: not enough input or error reading file\n"); + return 1; + } + + LZ4F_frameInfo_t info; + size_t consumedSize = readSize; + { size_t const fires = LZ4F_getFrameInfo(dctx, &info, src, &consumedSize); + if (LZ4F_isError(fires)) { + printf("LZ4F_getFrameInfo error: %s\n", LZ4F_getErrorName(fires)); + return 1; + } } + + /* Allocating enough space for an entire block isn't necessary for + * correctness, but it allows some memcpy's to be elided. + */ + size_t const dstCapacity = get_block_size(&info); + void* const dst = malloc(dstCapacity); + if (!dst) { perror("decompress_file(dst)"); return 1; } + + int const decompressionResult = decompress_file_internal( + f_in, f_out, + dctx, + src, srcCapacity, readSize-consumedSize, consumedSize, + dst, dstCapacity); + + free(dst); + return decompressionResult; +} + + +/* @result : 1==error, 0==success */ +static int decompress_file(FILE* f_in, FILE* f_out) +{ + assert(f_in != NULL); assert(f_out != NULL); + + /* Ressource allocation */ + void* const src = malloc(IN_CHUNK_SIZE); + if (!src) { perror("decompress_file(src)"); return 1; } + + LZ4F_dctx* dctx; + { size_t const dctxStatus = LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION); + if (LZ4F_isError(dctxStatus)) { + printf("LZ4F_dctx creation error: %s\n", LZ4F_getErrorName(dctxStatus)); + } } + + int const result = !dctx ? 1 /* error */ : + decompress_file_allocDst(f_in, f_out, dctx, src, IN_CHUNK_SIZE); + + free(src); + LZ4F_freeDecompressionContext(dctx); /* note : free works on NULL */ + return result; +} + + +int compareFiles(FILE* fp0, FILE* fp1) +{ + int result = 0; + + while (result==0) { + char b0[1024]; + char b1[1024]; + size_t const r0 = fread(b0, 1, sizeof(b0), fp0); + size_t const r1 = fread(b1, 1, sizeof(b1), fp1); + + result = (r0 != r1); + if (!r0 || !r1) break; + if (!result) result = memcmp(b0, b1, r0); + } + + return result; +} + + +int main(int argc, const char **argv) { + char inpFilename[256] = { 0 }; + char lz4Filename[256] = { 0 }; + char decFilename[256] = { 0 }; + + if (argc < 2) { + printf("Please specify input filename\n"); + return 0; + } + + snprintf(inpFilename, 256, "%s", argv[1]); + snprintf(lz4Filename, 256, "%s.lz4", argv[1]); + snprintf(decFilename, 256, "%s.lz4.dec", argv[1]); + + printf("inp = [%s]\n", inpFilename); + printf("lz4 = [%s]\n", lz4Filename); + printf("dec = [%s]\n", decFilename); + + /* compress */ + { FILE* const inpFp = fopen(inpFilename, "rb"); + FILE* const outFp = fopen(lz4Filename, "wb"); + + printf("compress : %s -> %s\n", inpFilename, lz4Filename); + compressResult_t const ret = compress_file(inpFp, outFp); + + fclose(outFp); + fclose(inpFp); + + if (ret.error) { + printf("compress : failed with code %i\n", ret.error); + return ret.error; + } + printf("%s: %zu → %zu bytes, %.1f%%\n", + inpFilename, + (size_t)ret.size_in, (size_t)ret.size_out, /* might overflow is size_t is 32 bits and size_{in,out} > 4 GB */ + (double)ret.size_out / ret.size_in * 100); + printf("compress : done\n"); + } + + /* decompress */ + { FILE* const inpFp = fopen(lz4Filename, "rb"); + FILE* const outFp = fopen(decFilename, "wb"); + + printf("decompress : %s -> %s\n", lz4Filename, decFilename); + int const ret = decompress_file(inpFp, outFp); + + fclose(outFp); + fclose(inpFp); + + if (ret) { + printf("decompress : failed with code %i\n", ret); + return ret; + } + printf("decompress : done\n"); + } + + /* verify */ + { FILE* const inpFp = fopen(inpFilename, "rb"); + FILE* const decFp = fopen(decFilename, "rb"); + + printf("verify : %s <-> %s\n", inpFilename, decFilename); + int const cmp = compareFiles(inpFp, decFp); + + fclose(decFp); + fclose(inpFp); + + if (cmp) { + printf("corruption detected : decompressed file differs from original\n"); + return cmp; + } + printf("verify : OK\n"); + } + + return 0; +} diff --git a/examples/printVersion.c b/examples/printVersion.c new file mode 100644 index 0000000..7af318a --- /dev/null +++ b/examples/printVersion.c @@ -0,0 +1,13 @@ +// LZ4 trivial example : print Library version number +// by Takayuki Matsuoka + + +#include +#include "lz4.h" + +int main(int argc, char** argv) +{ + (void)argc; (void)argv; + printf("Hello World ! LZ4 Library version = %d\n", LZ4_versionNumber()); + return 0; +} diff --git a/examples/simple_buffer.c b/examples/simple_buffer.c new file mode 100644 index 0000000..6afc62a --- /dev/null +++ b/examples/simple_buffer.c @@ -0,0 +1,99 @@ +/* + * simple_buffer.c + * Copyright : Kyle Harper + * License : Follows same licensing as the lz4.c/lz4.h program at any given time. Currently, BSD 2. + * Description: Example program to demonstrate the basic usage of the compress/decompress functions within lz4.c/lz4.h. + * The functions you'll likely want are LZ4_compress_default and LZ4_decompress_safe. + * Both of these are documented in the lz4.h header file; I recommend reading them. + */ + +/* Dependencies */ +#include // For printf() +#include // For memcmp() +#include // For exit() +#include "lz4.h" // This is all that is required to expose the prototypes for basic compression and decompression. + +/* + * Simple show-error-and-bail function. + */ +void run_screaming(const char* message, const int code) { + printf("%s \n", message); + exit(code); +} + + +/* + * main + */ +int main(void) { + /* Introduction */ + // Below we will have a Compression and Decompression section to demonstrate. + // There are a few important notes before we start: + // 1) The return codes of LZ4_ functions are important. + // Read lz4.h if you're unsure what a given code means. + // 2) LZ4 uses char* pointers in all LZ4_ functions. + // This is baked into the API and not going to change, for consistency. + // If your program uses different pointer types, + // you may need to do some casting or set the right -Wno compiler flags to ignore those warnings (e.g.: -Wno-pointer-sign). + + /* Compression */ + // We'll store some text into a variable pointed to by *src to be compressed later. + const char* const src = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Lorem ipsum dolor site amat."; + // The compression function needs to know how many bytes exist. Since we're using a string, we can use strlen() + 1 (for \0). + const int src_size = (int)(strlen(src) + 1); + // LZ4 provides a function that will tell you the maximum size of compressed output based on input data via LZ4_compressBound(). + const int max_dst_size = LZ4_compressBound(src_size); + // We will use that size for our destination boundary when allocating space. + char* compressed_data = malloc((size_t)max_dst_size); + if (compressed_data == NULL) + run_screaming("Failed to allocate memory for *compressed_data.", 1); + // That's all the information and preparation LZ4 needs to compress *src into *compressed_data. + // Invoke LZ4_compress_default now with our size values and pointers to our memory locations. + // Save the return value for error checking. + const int compressed_data_size = LZ4_compress_default(src, compressed_data, src_size, max_dst_size); + // Check return_value to determine what happened. + if (compressed_data_size <= 0) + run_screaming("A 0 or negative result from LZ4_compress_default() indicates a failure trying to compress the data. ", 1); + if (compressed_data_size > 0) + printf("We successfully compressed some data! Ratio: %.2f\n", + (float) compressed_data_size/src_size); + // Not only does a positive return_value mean success, the value returned == the number of bytes required. + // You can use this to realloc() *compress_data to free up memory, if desired. We'll do so just to demonstrate the concept. + compressed_data = (char *)realloc(compressed_data, (size_t)compressed_data_size); + if (compressed_data == NULL) + run_screaming("Failed to re-alloc memory for compressed_data. Sad :(", 1); + + + /* Decompression */ + // Now that we've successfully compressed the information from *src to *compressed_data, let's do the opposite! + // The decompression will need to know the compressed size, and an upper bound of the decompressed size. + // In this example, we just re-use this information from previous section, + // but in a real-world scenario, metadata must be transmitted to the decompression side. + // Each implementation is in charge of this part. Oftentimes, it adds some header of its own. + // Sometimes, the metadata can be extracted from the local context. + + // First, let's create a *new_src location of size src_size since we know that value. + char* const regen_buffer = malloc(src_size); + if (regen_buffer == NULL) + run_screaming("Failed to allocate memory for *regen_buffer.", 1); + // The LZ4_decompress_safe function needs to know where the compressed data is, how many bytes long it is, + // where the regen_buffer memory location is, and how large regen_buffer (uncompressed) output will be. + // Again, save the return_value. + const int decompressed_size = LZ4_decompress_safe(compressed_data, regen_buffer, compressed_data_size, src_size); + free(compressed_data); /* no longer useful */ + if (decompressed_size < 0) + run_screaming("A negative result from LZ4_decompress_safe indicates a failure trying to decompress the data. See exit code (echo $?) for value returned.", decompressed_size); + if (decompressed_size >= 0) + printf("We successfully decompressed some data!\n"); + // Not only does a positive return value mean success, + // value returned == number of bytes regenerated from compressed_data stream. + if (decompressed_size != src_size) + run_screaming("Decompressed data is different from original! \n", 1); + + /* Validation */ + // We should be able to compare our original *src with our *new_src and be byte-for-byte identical. + if (memcmp(src, regen_buffer, src_size) != 0) + run_screaming("Validation failed. *src and *new_src are not identical.", 1); + printf("Validation done. The string we ended up with is:\n%s\n", regen_buffer); + return 0; +} diff --git a/examples/streaming_api_basics.md b/examples/streaming_api_basics.md new file mode 100644 index 0000000..1ccc6e3 --- /dev/null +++ b/examples/streaming_api_basics.md @@ -0,0 +1,87 @@ +# LZ4 Streaming API Basics +by *Takayuki Matsuoka* +## LZ4 API sets + +LZ4 has the following API sets : + + - "Auto Framing" API (lz4frame.h) : + This is most recommended API for usual application. + It guarantees interoperability with other LZ4 framing format compliant tools/libraries + such as LZ4 command line utility, node-lz4, etc. + - "Block" API : This is recommended for simple purpose. + It compress single raw memory block to LZ4 memory block and vice versa. + - "Streaming" API : This is designed for complex things. + For example, compress huge stream data in restricted memory environment. + +Basically, you should use "Auto Framing" API. +But if you want to write advanced application, it's time to use Block or Streaming APIs. + + +## What is difference between Block and Streaming API ? + +Block API (de)compresses a single contiguous memory block. +In other words, LZ4 library finds redundancy from a single contiguous memory block. +Streaming API does same thing but (de)compresses multiple adjacent contiguous memory blocks. +So LZ4 library could find more redundancy than Block API. + +The following figure shows difference between API and block sizes. +In these figures, the original data is split into 4KiBytes contiguous chunks. + +``` +Original Data + +---------------+---------------+----+----+----+ + | 4KiB Chunk A | 4KiB Chunk B | C | D |... | + +---------------+---------------+----+----+----+ + +Example (1) : Block API, 4KiB Block + +---------------+---------------+----+----+----+ + | 4KiB Chunk A | 4KiB Chunk B | C | D |... | + +---------------+---------------+----+----+----+ + | Block #1 | Block #2 | #3 | #4 |... | + +---------------+---------------+----+----+----+ + + (No Dependency) + + +Example (2) : Block API, 8KiB Block + +---------------+---------------+----+----+----+ + | 4KiB Chunk A | 4KiB Chunk B | C | D |... | + +---------------+---------------+----+----+----+ + | Block #1 |Block #2 |... | + +--------------------+----------+-------+-+----+ + ^ | ^ | + | | | | + +--------------+ +----+ + Internal Dependency Internal Dependency + + +Example (3) : Streaming API, 4KiB Block + +---------------+---------------+-----+----+----+ + | 4KiB Chunk A | 4KiB Chunk B | C | D |... | + +---------------+---------------+-----+----+----+ + | Block #1 | Block #2 | #3 | #4 |... | + +---------------+----+----------+-+---+-+--+----+ + ^ | ^ | ^ | + | | | | | | + +--------------+ +--------+ +---+ + Dependency Dependency Dependency +``` + + - In example (1), there is no dependency. + All blocks are compressed independently. + - In example (2), naturally 8KiBytes block has internal dependency. + But still block #1 and #2 are compressed independently. + - In example (3), block #2 has dependency to #1, + also #3 has dependency to #2 and #1, #4 has #3, #2 and #1, and so on. + +Here, we can observe difference between example (2) and (3). +In (2), there's no dependency between chunk B and C, but (3) has dependency between B and C. +This dependency improves compression ratio. + + +## Restriction of Streaming API + +For efficiency, Streaming API doesn't keep a mirror copy of dependent (de)compressed memory. +This means users should keep these dependent (de)compressed memory explicitly. +Usually, "Dependent memory" is previous adjacent contiguous memory up to 64KiBytes. +LZ4 will not access further memories. diff --git a/lib/Makefile b/lib/Makefile index 8f21d3d..c12949b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -28,7 +28,7 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # You can contact the author at : -# - LZ4 source repository : https://github.com/Cyan4973/lz4 +# - LZ4 source repository : https://github.com/lz4/lz4 # - LZ4 forum froup : https://groups.google.com/forum/#!forum/lz4c # ################################################################ diff --git a/lib/README.md b/lib/README.md index cba2c34..e2af868 100644 --- a/lib/README.md +++ b/lib/README.md @@ -35,21 +35,22 @@ So it's necessary to include all `*.c` and `*.h` files present in `/lib`. Definitions which are not guaranteed to remain stable in future versions, are protected behind macros, such as `LZ4_STATIC_LINKING_ONLY`. -As the name implies, these definitions can only be invoked +As the name strongly implies, these definitions should only be invoked in the context of static linking ***only***. Otherwise, dependent application may fail on API or ABI break in the future. -The associated symbols are also not present in dynamic library by default. +The associated symbols are also not exposed by the dynamic library by default. Should they be nonetheless needed, it's possible to force their publication -by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`. +by using build macros `LZ4_PUBLISH_STATIC_FUNCTIONS` +and `LZ4F_PUBLISH_STATIC_FUNCTIONS`. #### Build macros -The following build macro can be selected at compilation time : +The following build macro can be selected to adjust source code behavior at compilation time : -- `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop. - This loops works great on x86/x64 cpus, and is automatically enabled on this platform. - It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor. +- `LZ4_FAST_DEC_LOOP` : this triggers a speed optimized decompression loop, more powerful on modern cpus. + This loop works great on `x86`, `x64` and `aarch64` cpus, and is automatically enabled for them. + It's also possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor. For example, with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`, and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`. @@ -65,8 +66,24 @@ The following build macro can be selected at compilation time : Should this be a problem, it's generally possible to make the compiler ignore these warnings, for example with `-Wno-deprecated-declarations` on `gcc`, or `_CRT_SECURE_NO_WARNINGS` for Visual Studio. - Another method is to define `LZ4_DISABLE_DEPRECATE_WARNINGS` - before including the LZ4 header files. + This build macro offers another project-specific method + by defining `LZ4_DISABLE_DEPRECATE_WARNINGS` before including the LZ4 header files. + +- `LZ4_USER_MEMORY_FUNCTIONS` : replace calls to 's `malloc`, `calloc` and `free` + by user-defined functions, which must be called `LZ4_malloc()`, `LZ4_calloc()` and `LZ4_free()`. + User functions must be available at link time. + +- `LZ4_FORCE_SW_BITCOUNT` : by default, the compression algorithm tries to determine lengths + by using bitcount instructions, generally implemented as fast single instructions in many cpus. + In case the target cpus doesn't support it, or compiler intrinsic doesn't work, or feature bad performance, + it's possible to use an optimized software path instead. + This is achieved by setting this build macros . + In most cases, it's not expected to be necessary, + but it can be legitimately considered for less common platforms. + +- `LZ4_ALIGN_TEST` : alignment test ensures that the memory area + passed as argument to become a compression state is suitably aligned. + This test can be disabled if it proves flaky, by setting this value to 0. #### Amalgamation @@ -102,7 +119,7 @@ The compiled executable will require LZ4 DLL which is available at `dll\liblz4.d #### Miscellaneous -Other files present in the directory are not source code. There are : +Other files present in the directory are not source code. They are : - `LICENSE` : contains the BSD license text - `Makefile` : `make` script to compile and install lz4 library (static and dynamic) diff --git a/lib/lz4.c b/lib/lz4.c index 805388d..9f5e9bf 100644 --- a/lib/lz4.c +++ b/lib/lz4.c @@ -45,10 +45,16 @@ #endif /* - * ACCELERATION_DEFAULT : + * LZ4_ACCELERATION_DEFAULT : * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0 */ -#define ACCELERATION_DEFAULT 1 +#define LZ4_ACCELERATION_DEFAULT 1 +/* + * LZ4_ACCELERATION_MAX : + * Any "acceleration" value higher than this threshold + * get treated as LZ4_ACCELERATION_MAX instead (fix #876) + */ +#define LZ4_ACCELERATION_MAX 65537 /*-************************************ @@ -82,6 +88,7 @@ * Define this parameter if your target system or compiler does not support hardware bit count */ #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */ +# undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */ # define LZ4_FORCE_SW_BITCOUNT #endif @@ -114,10 +121,9 @@ /*-************************************ * Compiler Options **************************************/ -#ifdef _MSC_VER /* Visual Studio */ -# include -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -# pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */ +# include /* only present in VS2005+ */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ #endif /* _MSC_VER */ #ifndef LZ4_FORCE_INLINE @@ -136,7 +142,7 @@ # endif /* _MSC_VER */ #endif /* LZ4_FORCE_INLINE */ -/* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE +/* LZ4_FORCE_O2 and LZ4_FORCE_INLINE * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8, * together with a simple 8-byte copy loop as a fall-back path. * However, this optimization hurts the decompression speed by >30%, @@ -151,11 +157,11 @@ * of LZ4_wildCopy8 does not affect the compression speed. */ #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__) -# define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2"))) -# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE +# define LZ4_FORCE_O2 __attribute__((optimize("O2"))) +# undef LZ4_FORCE_INLINE +# define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline)) #else -# define LZ4_FORCE_O2_GCC_PPC64LE -# define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static +# define LZ4_FORCE_O2 #endif #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__) @@ -171,14 +177,33 @@ #define unlikely(expr) expect((expr) != 0, 0) #endif +/* Should the alignment test prove unreliable, for some reason, + * it can be disabled by setting LZ4_ALIGN_TEST to 0 */ +#ifndef LZ4_ALIGN_TEST /* can be externally provided */ +# define LZ4_ALIGN_TEST 1 +#endif + /*-************************************ * Memory routines **************************************/ -#include /* malloc, calloc, free */ -#define ALLOC(s) malloc(s) -#define ALLOC_AND_ZERO(s) calloc(1,s) -#define FREEMEM(p) free(p) +#ifdef LZ4_USER_MEMORY_FUNCTIONS +/* memory management functions can be customized by user project. + * Below functions must exist somewhere in the Project + * and be available at link time */ +void* LZ4_malloc(size_t s); +void* LZ4_calloc(size_t n, size_t s); +void LZ4_free(void* p); +# define ALLOC(s) LZ4_malloc(s) +# define ALLOC_AND_ZERO(s) LZ4_calloc(1,s) +# define FREEMEM(p) LZ4_free(p) +#else +# include /* malloc, calloc, free */ +# define ALLOC(s) malloc(s) +# define ALLOC_AND_ZERO(s) calloc(1,s) +# define FREEMEM(p) free(p) +#endif + #include /* memset, memcpy */ #define MEM_INIT(p,v,s) memset((p),(v),(s)) @@ -225,21 +250,27 @@ static const int LZ4_minLength = (MFLIMIT+1); #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2) # include -static int g_debuglog_enable = 1; -# define DEBUGLOG(l, ...) { \ - if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ - fprintf(stderr, __FILE__ ": "); \ - fprintf(stderr, __VA_ARGS__); \ - fprintf(stderr, " \n"); \ - } } + static int g_debuglog_enable = 1; +# define DEBUGLOG(l, ...) { \ + if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \ + fprintf(stderr, __FILE__ ": "); \ + fprintf(stderr, __VA_ARGS__); \ + fprintf(stderr, " \n"); \ + } } #else -# define DEBUGLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ #endif +static int LZ4_isAligned(const void* ptr, size_t alignment) +{ + return ((size_t)ptr & (alignment -1)) == 0; +} + /*-************************************ * Types **************************************/ +#include #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) # include typedef uint8_t BYTE; @@ -249,6 +280,9 @@ static int g_debuglog_enable = 1; typedef uint64_t U64; typedef uintptr_t uptrval; #else +# if UINT_MAX != 4294967295UL +# error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4" +# endif typedef unsigned char BYTE; typedef unsigned short U16; typedef unsigned int U32; @@ -273,6 +307,21 @@ typedef enum { /*-************************************ * Reading and writing into memory **************************************/ + +/** + * LZ4 relies on memcpy with a constant size being inlined. In freestanding + * environments, the compiler can't assume the implementation of memcpy() is + * standard compliant, so it can't apply its specialized memcpy() inlining + * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze + * memcpy() as if it were standard compliant, so it can inline it in freestanding + * environments. This is needed when decompressing the Linux Kernel, for example. + */ +#if defined(__GNUC__) && (__GNUC__ >= 4) +#define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size) +#else +#define LZ4_memcpy(dst, src, size) memcpy(dst, src, size) +#endif + static unsigned LZ4_isLittleEndian(void) { const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */ @@ -307,27 +356,27 @@ static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = val static U16 LZ4_read16(const void* memPtr) { - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; + U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } static U32 LZ4_read32(const void* memPtr) { - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; + U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } static reg_t LZ4_read_ARCH(const void* memPtr) { - reg_t val; memcpy(&val, memPtr, sizeof(val)); return val; + reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val; } static void LZ4_write16(void* memPtr, U16 value) { - memcpy(memPtr, &value, sizeof(value)); + LZ4_memcpy(memPtr, &value, sizeof(value)); } static void LZ4_write32(void* memPtr, U32 value) { - memcpy(memPtr, &value, sizeof(value)); + LZ4_memcpy(memPtr, &value, sizeof(value)); } #endif /* LZ4_FORCE_MEMORY_ACCESS */ @@ -355,14 +404,14 @@ static void LZ4_writeLE16(void* memPtr, U16 value) } /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE +LZ4_FORCE_INLINE void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; const BYTE* s = (const BYTE*)srcPtr; BYTE* const e = (BYTE*)dstEnd; - do { memcpy(d,s,8); d+=8; s+=8; } while (d = 16. */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE void +LZ4_FORCE_INLINE void LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd) { BYTE* d = (BYTE*)dstPtr; const BYTE* s = (const BYTE*)srcPtr; BYTE* const e = (BYTE*)dstEnd; - do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d = dstPtr + MINMATCH * - there is at least 8 bytes available to write after dstEnd */ -LZ4_FORCE_O2_INLINE_GCC_PPC64LE void +LZ4_FORCE_INLINE void LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset) { BYTE v[8]; assert(dstEnd >= dstPtr + MINMATCH); - LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */ switch(offset) { case 1: - memset(v, *srcPtr, 8); + MEM_INIT(v, *srcPtr, 8); break; case 2: - memcpy(v, srcPtr, 2); - memcpy(&v[2], srcPtr, 2); - memcpy(&v[4], &v[0], 4); + LZ4_memcpy(v, srcPtr, 2); + LZ4_memcpy(&v[2], srcPtr, 2); + LZ4_memcpy(&v[4], v, 4); break; case 4: - memcpy(v, srcPtr, 4); - memcpy(&v[4], srcPtr, 4); + LZ4_memcpy(v, srcPtr, 4); + LZ4_memcpy(&v[4], srcPtr, 4); break; default: LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset); return; } - memcpy(dstPtr, v, 8); + LZ4_memcpy(dstPtr, v, 8); dstPtr += 8; while (dstPtr < dstEnd) { - memcpy(dstPtr, v, 8); + LZ4_memcpy(dstPtr, v, 8); dstPtr += 8; } } @@ -462,75 +512,92 @@ LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const si **************************************/ static unsigned LZ4_NbCommonBytes (reg_t val) { + assert(val != 0); if (LZ4_isLittleEndian()) { - if (sizeof(val)==8) { -# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) + if (sizeof(val) == 8) { +# if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT) + /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */ + return (unsigned)_tzcnt_u64(val) >> 3; +# elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r = 0; - _BitScanForward64( &r, (U64)val ); - return (int)(r>>3); -# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) + _BitScanForward64(&r, (U64)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_ctzll((U64)val) >> 3; # else - static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, - 0, 3, 1, 3, 1, 4, 2, 7, - 0, 2, 3, 6, 1, 5, 3, 5, - 1, 3, 4, 4, 2, 5, 6, 7, - 7, 0, 1, 2, 3, 3, 4, 6, - 2, 6, 5, 5, 3, 4, 5, 6, - 7, 1, 2, 4, 6, 4, 4, 5, - 7, 2, 6, 5, 7, 6, 7, 7 }; - return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; + const U64 m = 0x0101010101010101ULL; + val ^= val - 1; + return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56); # endif } else /* 32 bits */ { -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) +# if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT) unsigned long r; - _BitScanForward( &r, (U32)val ); - return (int)(r>>3); -# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) + _BitScanForward(&r, (U32)val); + return (unsigned)r >> 3; +# elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_ctz((U32)val) >> 3; # else - static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, - 3, 2, 2, 1, 3, 2, 0, 1, - 3, 3, 1, 2, 2, 2, 2, 0, - 3, 1, 2, 0, 1, 0, 1, 1 }; - return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; + const U32 m = 0x01010101; + return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24; # endif } } else /* Big Endian CPU */ { - if (sizeof(val)==8) { /* 64-bits */ -# if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse64( &r, val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) + if (sizeof(val)==8) { +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_clzll((U64)val) >> 3; # else +#if 1 + /* this method is probably faster, + * but adds a 128 bytes lookup table */ + static const unsigned char ctz7_tab[128] = { + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + }; + U64 const mask = 0x0101010101010101ULL; + U64 const t = (((val >> 8) - mask) | val) & mask; + return ctz7_tab[(t * 0x0080402010080402ULL) >> 57]; +#else + /* this method doesn't consume memory space like the previous one, + * but it contains several branches, + * that may end up slowing execution */ static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits. - Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. - Note that this code path is never triggered in 32-bits mode. */ + Just to avoid some static analyzer complaining about shift by 32 on 32-bits target. + Note that this code path is never triggered in 32-bits mode. */ unsigned r; if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; } if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } r += (!val); return r; +#endif # endif } else /* 32 bits */ { -# if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) - unsigned long r = 0; - _BitScanReverse( &r, (unsigned long)val ); - return (unsigned)(r>>3); -# elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT) +# if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \ + ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \ + !defined(LZ4_FORCE_SW_BITCOUNT) return (unsigned)__builtin_clz((U32)val) >> 3; # else - unsigned r; - if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } - r += (!val); - return r; + val >>= 8; + val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) | + (val + 0x00FF0000)) >> 24; + return (unsigned)val ^ 3; # endif } } } + #define STEPSIZE sizeof(reg_t) LZ4_FORCE_INLINE unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit) @@ -605,7 +672,7 @@ typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive; int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; } const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; } int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); } -int LZ4_sizeofState() { return LZ4_STREAMSIZE; } +int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; } /*-************************************ @@ -628,7 +695,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, /*-****************************** * Compression functions ********************************/ -static U32 LZ4_hash4(U32 sequence, tableType_t const tableType) +LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType) { if (tableType == byU16) return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1))); @@ -636,7 +703,7 @@ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType) return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG)); } -static U32 LZ4_hash5(U64 sequence, tableType_t const tableType) +LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType) { const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG; if (LZ4_isLittleEndian()) { @@ -654,7 +721,7 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab return LZ4_hash4(LZ4_read32(p), tableType); } -static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) +LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) { switch (tableType) { @@ -666,7 +733,7 @@ static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType) } } -static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) +LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType) { switch (tableType) { @@ -678,7 +745,7 @@ static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t cons } } -static void LZ4_putPositionOnHash(const BYTE* p, U32 h, +LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t const tableType, const BYTE* srcBase) { @@ -703,7 +770,7 @@ LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_ * Assumption 1 : only valid if tableType == byU32 or byU16. * Assumption 2 : h is presumed valid (within limits of hash table) */ -static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) +LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType) { LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2); if (tableType == byU32) { @@ -739,22 +806,13 @@ LZ4_FORCE_INLINE void LZ4_prepareTable(LZ4_stream_t_internal* const cctx, const int inputSize, const tableType_t tableType) { - /* If compression failed during the previous step, then the context - * is marked as dirty, therefore, it has to be fully reset. - */ - if (cctx->dirty) { - DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx); - MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal)); - return; - } - /* If the table hasn't been used, it's guaranteed to be zeroed out, and is * therefore safe to use no matter what mode we're in. Otherwise, we figure * out if it's safe to leave as is or whether it needs to be reset. */ - if (cctx->tableType != clearedTable) { + if ((tableType_t)cctx->tableType != clearedTable) { assert(inputSize >= 0); - if (cctx->tableType != tableType + if ((tableType_t)cctx->tableType != tableType || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU) || ((tableType == byU32) && cctx->currentOffset > 1 GB) || tableType == byPtr @@ -763,7 +821,7 @@ LZ4_prepareTable(LZ4_stream_t_internal* const cctx, DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx); MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE); cctx->currentOffset = 0; - cctx->tableType = clearedTable; + cctx->tableType = (U32)clearedTable; } else { DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)"); } @@ -785,8 +843,12 @@ LZ4_prepareTable(LZ4_stream_t_internal* const cctx, } /** LZ4_compress_generic() : - inlined, to ensure branches are decided at compilation time */ -LZ4_FORCE_INLINE int LZ4_compress_generic( + * inlined, to ensure branches are decided at compilation time. + * Presumed already validated at this stage: + * - source != NULL + * - inputSize > 0 + */ +LZ4_FORCE_INLINE int LZ4_compress_generic_validated( LZ4_stream_t_internal* const cctx, const char* const source, char* const dest, @@ -815,7 +877,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx); U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */ - const BYTE* const dictEnd = dictionary + dictSize; + const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary; const BYTE* anchor = (const BYTE*) source; const BYTE* const iend = ip + inputSize; const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1; @@ -823,7 +885,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( /* the dictCtx currentOffset is indexed on the start of the dictionary, * while a dictionary in the current context precedes the currentOffset */ - const BYTE* dictBase = (dictDirective == usingDictCtx) ? + const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ? dictionary + dictSize - dictCtx->currentOffset : dictionary + dictSize - startIndex; @@ -833,11 +895,11 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( U32 offset = 0; U32 forwardH; - DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType); + DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType); + assert(ip != NULL); /* If init conditions are not met, we don't have to mark stream * as having dirty context, since no action was taken yet */ if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */ - if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported inputSize, too large (or negative) */ if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */ if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */ assert(acceleration >= 1); @@ -854,7 +916,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic( cctx->dictSize += (U32)inputSize; } cctx->currentOffset += (U32)inputSize; - cctx->tableType = (U16)tableType; + cctx->tableType = (U32)tableType; if (inputSize = op); - lastRun = (size_t)(olimit-op) - 1; - lastRun -= (lastRun+240)/255; + lastRun = (size_t)(olimit-op) - 1/*token*/; + lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/ } else { assert(outputDirective == limitedOutput); return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */ } } + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun); if (lastRun >= RUN_MASK) { size_t accumulator = lastRun - RUN_MASK; *op++ = RUN_MASK << ML_BITS; @@ -1162,7 +1225,7 @@ _last_literals: } else { *op++ = (BYTE)(lastRun< 0); + DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result); return result; } +/** LZ4_compress_generic() : + * inlined, to ensure branches are decided at compilation time; + * takes care of src == (NULL, 0) + * and forward the rest to LZ4_compress_generic_validated */ +LZ4_FORCE_INLINE int LZ4_compress_generic( + LZ4_stream_t_internal* const cctx, + const char* const src, + char* const dst, + const int srcSize, + int *inputConsumed, /* only written when outputDirective == fillOutput */ + const int dstCapacity, + const limitedOutput_directive outputDirective, + const tableType_t tableType, + const dict_directive dictDirective, + const dictIssue_directive dictIssue, + const int acceleration) +{ + DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i", + srcSize, dstCapacity); + + if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */ + if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */ + if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */ + DEBUGLOG(5, "Generating an empty block"); + assert(outputDirective == notLimited || dstCapacity >= 1); + assert(dst != NULL); + dst[0] = 0; + if (outputDirective == fillOutput) { + assert (inputConsumed != NULL); + *inputConsumed = 0; + } + return 1; + } + assert(src != NULL); + + return LZ4_compress_generic_validated(cctx, src, dst, srcSize, + inputConsumed, /* only written into if outputDirective == fillOutput */ + dstCapacity, outputDirective, + tableType, dictDirective, dictIssue, acceleration); +} + int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration) { LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse; assert(ctx != NULL); - if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; if (maxOutputSize >= LZ4_compressBound(inputSize)) { if (inputSize < LZ4_64Klimit) { return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration); @@ -1211,7 +1316,8 @@ int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) { LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse; - if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; if (dstCapacity >= LZ4_compressBound(srcSize)) { if (srcSize < LZ4_64Klimit) { @@ -1270,22 +1376,6 @@ int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputS } -/* hidden debug function */ -/* strangely enough, gcc generates faster code when this function is uncommented, even if unused */ -int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration) -{ - LZ4_stream_t ctx; - LZ4_initStream(&ctx, sizeof(ctx)); - - if (srcSize < LZ4_64Klimit) { - return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16, noDict, noDictIssue, acceleration); - } else { - tableType_t const addrMode = (sizeof(void*) > 4) ? byU32 : byPtr; - return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration); - } -} - - /* Note!: This function leaves the stream in an unclean/broken state! * It is not safe to subsequently use the same state with a _fastReset() or * _continue() call without resetting it. */ @@ -1340,27 +1430,23 @@ LZ4_stream_t* LZ4_createStream(void) return lz4s; } -#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : - it reports an aligment of 8-bytes, - while actually aligning LZ4_stream_t on 4 bytes. */ static size_t LZ4_stream_t_alignment(void) { - struct { char c; LZ4_stream_t t; } t_a; - return sizeof(t_a) - sizeof(t_a.t); -} +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_stream_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_stream_t); +#else + return 1; /* effectively disabled */ #endif +} LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) { DEBUGLOG(5, "LZ4_initStream"); if (buffer == NULL) { return NULL; } if (size < sizeof(LZ4_stream_t)) { return NULL; } -#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : - it reports an aligment of 8-bytes, - while actually aligning LZ4_stream_t on 4 bytes. */ - if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) { return NULL; } /* alignment check */ -#endif - MEM_INIT(buffer, 0, sizeof(LZ4_stream_t)); + if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL; + MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal)); return (LZ4_stream_t*)buffer; } @@ -1369,7 +1455,7 @@ LZ4_stream_t* LZ4_initStream (void* buffer, size_t size) void LZ4_resetStream (LZ4_stream_t* LZ4_stream) { DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream); - MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t)); + MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal)); } void LZ4_resetStream_fast(LZ4_stream_t* ctx) { @@ -1418,7 +1504,7 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize) base = dictEnd - dict->currentOffset; dict->dictionary = p; dict->dictSize = (U32)(dictEnd - p); - dict->tableType = tableType; + dict->tableType = (U32)tableType; while (p <= dictEnd-HASH_UNIT) { LZ4_putPosition(p, dict->hashTable, tableType, base); @@ -1436,12 +1522,6 @@ void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dict workingStream, dictionaryStream, dictCtx != NULL ? dictCtx->dictSize : 0); - /* Calling LZ4_resetStream_fast() here makes sure that changes will not be - * erased by subsequent calls to LZ4_resetStream_fast() in case stream was - * marked as having dirty context, e.g. requiring full reset. - */ - LZ4_resetStream_fast(workingStream); - if (dictCtx != NULL) { /* If the current offset is zero, we will never look in the * external dictionary context, since there is no value a table @@ -1493,9 +1573,9 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize); - if (streamPtr->dirty) { return 0; } /* Uninitialized structure detected */ LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */ - if (acceleration < 1) acceleration = ACCELERATION_DEFAULT; + if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT; + if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX; /* invalidate tiny dictionaries */ if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */ @@ -1538,7 +1618,7 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream, * cost to copy the dictionary's tables into the active context, * so that the compression loop is only looking into one table. */ - memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t)); + LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr)); result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration); } else { result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration); @@ -1593,7 +1673,9 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize) if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */ if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; } - memmove(safeBuffer, previousDictEnd - dictSize, dictSize); + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + memmove(safeBuffer, previousDictEnd - dictSize, dictSize); dict->dictionary = (const BYTE*)safeBuffer; dict->dictSize = (U32)dictSize; @@ -1623,25 +1705,27 @@ typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive; */ typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error; LZ4_FORCE_INLINE unsigned -read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error) +read_variable_length(const BYTE**ip, const BYTE* lencheck, + int loop_check, int initial_check, + variable_length_error* error) { - unsigned length = 0; - unsigned s; - if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ - *error = initial_error; - return length; - } - do { - s = **ip; - (*ip)++; - length += s; - if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ - *error = loop_error; - return length; + U32 length = 0; + U32 s; + if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = initial_error; + return length; } - } while (s==255); + do { + s = **ip; + (*ip)++; + length += s; + if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */ + *error = loop_error; + return length; + } + } while (s==255); - return length; + return length; } /*! LZ4_decompress_generic() : @@ -1665,7 +1749,7 @@ LZ4_decompress_generic( const size_t dictSize /* note : = 0 if noDict */ ) { - if ((src == NULL) || (outputSize < 0)) { return -1; } + if (src == NULL) { return -1; } { const BYTE* ip = (const BYTE*) src; const BYTE* const iend = ip + srcSize; @@ -1722,7 +1806,7 @@ LZ4_decompress_generic( /* decode literal length */ if (length == RUN_MASK) { variable_length_error error = ok; - length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error); + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); if (error == initial_error) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ @@ -1746,12 +1830,12 @@ LZ4_decompress_generic( /* We don't need to check oend, since we check it once for each loop below */ if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; } /* Literals can only be 14, but hope compilers optimize if we copy by a register size */ - memcpy(op, ip, 16); + LZ4_memcpy(op, ip, 16); } else { /* LZ4_decompress_fast() */ /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time : * it doesn't know input length, and relies on end-of-block properties */ - memcpy(op, ip, 8); - if (length > 8) { memcpy(op+8, ip+8, 8); } + LZ4_memcpy(op, ip, 8); + if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); } } ip += length; op = cpy; } @@ -1765,10 +1849,10 @@ LZ4_decompress_generic( length = token & ML_MASK; if (length == ML_MASK) { - variable_length_error error = ok; - if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ - length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error); - if (error != ok) { goto _output_error; } + variable_length_error error = ok; + if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); + if (error != ok) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */ length += MINMATCH; if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) { @@ -1787,19 +1871,20 @@ LZ4_decompress_generic( assert(match <= op); assert(op + 18 <= oend); - memcpy(op, match, 8); - memcpy(op+8, match+8, 8); - memcpy(op+16, match+16, 2); + LZ4_memcpy(op, match, 8); + LZ4_memcpy(op+8, match+8, 8); + LZ4_memcpy(op+16, match+16, 2); op += length; continue; } } } - if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ + if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */ /* match starting within external dictionary */ if ((dict==usingExtDict) && (match < lowPrefix)) { if (unlikely(op+length > oend-LASTLITERALS)) { if (partialDecoding) { - length = MIN(length, (size_t)(oend-op)); /* reach end of buffer */ + DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd"); + length = MIN(length, (size_t)(oend-op)); } else { goto _output_error; /* end-of-block condition violated */ } } @@ -1812,14 +1897,14 @@ LZ4_decompress_generic( /* match stretches into both external dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; - memcpy(op, dictEnd - copySize, copySize); + LZ4_memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) { *op++ = *copyFrom++; } } else { - memcpy(op, lowPrefix, restSize); + LZ4_memcpy(op, lowPrefix, restSize); op += restSize; } } continue; @@ -1860,7 +1945,7 @@ LZ4_decompress_generic( /* strictly "less than" on input, to re-enter the loop with at least one byte */ && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) { /* Copy the literals */ - memcpy(op, ip, endOnInput ? 16 : 8); + LZ4_memcpy(op, ip, endOnInput ? 16 : 8); op += length; ip += length; /* The second stage: prepare for match copying, decode full info. @@ -1875,9 +1960,9 @@ LZ4_decompress_generic( && (offset >= 8) && (dict==withPrefix64k || match >= lowPrefix) ) { /* Copy the match. */ - memcpy(op + 0, match + 0, 8); - memcpy(op + 8, match + 8, 8); - memcpy(op +16, match +16, 2); + LZ4_memcpy(op + 0, match + 0, 8); + LZ4_memcpy(op + 8, match + 8, 8); + LZ4_memcpy(op +16, match +16, 2); op += length + MINMATCH; /* Both stages worked, load the next token. */ continue; @@ -1891,7 +1976,7 @@ LZ4_decompress_generic( /* decode literal length */ if (length == RUN_MASK) { variable_length_error error = ok; - length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error); + length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error); if (error == initial_error) { goto _output_error; } if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */ if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */ @@ -1907,29 +1992,34 @@ LZ4_decompress_generic( || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) ) { /* We've either hit the input parsing restriction or the output parsing restriction. - * If we've hit the input parsing condition then this must be the last sequence. - * If we've hit the output parsing condition then we are either using partialDecoding - * or we've hit the output parsing condition. + * In the normal scenario, decoding a full block, it must be the last sequence, + * otherwise it's an error (invalid input or dimensions). + * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow. */ if (partialDecoding) { /* Since we are partial decoding we may be in this block because of the output parsing * restriction, which is not valid since the output buffer is allowed to be undersized. */ assert(endOnInput); - /* If we're in this block because of the input parsing condition, then we must be on the - * last sequence (or invalid), so we must check that we exactly consume the input. + DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end") + DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length); + DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op)); + DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip)); + /* Finishing in the middle of a literals segment, + * due to lack of input. */ - if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; } - assert(ip+length <= iend); - /* We are finishing in the middle of a literals segment. - * Break after the copy. + if (ip+length > iend) { + length = (size_t)(iend-ip); + cpy = op + length; + } + /* Finishing in the middle of a literals segment, + * due to lack of output space. */ if (cpy > oend) { cpy = oend; assert(op<=oend); length = (size_t)(oend-op); } - assert(ip+length <= iend); } else { /* We must be on the last sequence because of the parsing limitations so check * that we exactly regenerate the original size (must be exact when !endOnInput). @@ -1938,16 +2028,22 @@ LZ4_decompress_generic( /* We must be on the last sequence (or invalid) because of the parsing limitations * so check that we exactly consume the input and don't overrun the output buffer. */ - if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; } + if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { + DEBUGLOG(6, "should have been last run of literals") + DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend); + DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend); + goto _output_error; + } } - memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */ + memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */ ip += length; op += length; - /* Necessarily EOF when !partialDecoding. When partialDecoding - * it is EOF if we've either filled the output buffer or hit - * the input parsing restriction. + /* Necessarily EOF when !partialDecoding. + * When partialDecoding, it is EOF if we've either + * filled the output buffer or + * can't proceed with reading an offset for following match. */ - if (!partialDecoding || (cpy == oend) || (ip == iend)) { + if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) { break; } } else { @@ -1965,7 +2061,7 @@ LZ4_decompress_generic( _copy_match: if (length == ML_MASK) { variable_length_error error = ok; - length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error); + length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error); if (error != ok) goto _output_error; if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */ } @@ -1990,14 +2086,14 @@ LZ4_decompress_generic( /* match stretches into both external dictionary and current block */ size_t const copySize = (size_t)(lowPrefix - match); size_t const restSize = length - copySize; - memcpy(op, dictEnd - copySize, copySize); + LZ4_memcpy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) *op++ = *copyFrom++; } else { - memcpy(op, lowPrefix, restSize); + LZ4_memcpy(op, lowPrefix, restSize); op += restSize; } } continue; @@ -2016,7 +2112,7 @@ LZ4_decompress_generic( if (matchEnd > op) { /* overlap copy */ while (op < copyEnd) { *op++ = *match++; } } else { - memcpy(op, match, mlen); + LZ4_memcpy(op, match, mlen); } op = copyEnd; if (op == oend) { break; } @@ -2030,10 +2126,10 @@ LZ4_decompress_generic( op[2] = match[2]; op[3] = match[3]; match += inc32table[offset]; - memcpy(op+4, match, 4); + LZ4_memcpy(op+4, match, 4); match -= dec64table[offset]; } else { - memcpy(op, match, 8); + LZ4_memcpy(op, match, 8); match += 8; } op += 8; @@ -2048,7 +2144,7 @@ LZ4_decompress_generic( } while (op < cpy) { *op++ = *match++; } } else { - memcpy(op, match, 8); + LZ4_memcpy(op, match, 8); if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); } } op = cpy; /* wildcopy correction */ @@ -2056,6 +2152,7 @@ LZ4_decompress_generic( /* end of decoding */ if (endOnInput) { + DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst)); return (int) (((char*)op)-dst); /* Nb of output bytes decoded */ } else { return (int) (((const char*)ip)-src); /* Nb of input bytes read */ @@ -2070,7 +2167,7 @@ LZ4_decompress_generic( /*===== Instantiate the API decoding functions. =====*/ -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize, @@ -2078,7 +2175,7 @@ int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int (BYTE*)dest, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity) { dstCapacity = MIN(targetOutputSize, dstCapacity); @@ -2087,7 +2184,7 @@ int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, noDict, (BYTE*)dst, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_fast(const char* source, char* dest, int originalSize) { return LZ4_decompress_generic(source, dest, 0, originalSize, @@ -2097,7 +2194,7 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize) /*===== Instantiate a few more decoding cases, used more than once. =====*/ -LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */ +LZ4_FORCE_O2 /* Exported, an obsolete API function. */ int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize) { return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize, @@ -2113,7 +2210,7 @@ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int origin return LZ4_decompress_fast(source, dest, originalSize); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize, size_t prefixSize) { @@ -2122,7 +2219,7 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i (BYTE*)dest-prefixSize, NULL, 0); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, int compressedSize, int maxOutputSize, const void* dictStart, size_t dictSize) @@ -2132,7 +2229,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest, (BYTE*)dest, (const BYTE*)dictStart, dictSize); } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize, const void* dictStart, size_t dictSize) { @@ -2221,7 +2318,7 @@ int LZ4_decoderRingBufferSize(int maxBlockSize) If it's not possible, save the relevant part of decoded data into a safe buffer, and indicate where it stands using LZ4_setStreamDecode() */ -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; @@ -2261,7 +2358,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch return result; } -LZ4_FORCE_O2_GCC_PPC64LE +LZ4_FORCE_O2 int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize) { LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse; @@ -2374,7 +2471,7 @@ int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, /* Obsolete Streaming functions */ -int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; } +int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; } int LZ4_resetStreamState(void* state, char* inputBuffer) { diff --git a/lib/lz4.h b/lib/lz4.h index 32108e2..7ab1e48 100644 --- a/lib/lz4.h +++ b/lib/lz4.h @@ -100,7 +100,7 @@ extern "C" { /*------ Version ------*/ #define LZ4_VERSION_MAJOR 1 /* for breaking interface changes */ #define LZ4_VERSION_MINOR 9 /* for new (non-breaking) interface capabilities */ -#define LZ4_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */ +#define LZ4_VERSION_RELEASE 3 /* for tweaks, bug-fixes, or development */ #define LZ4_VERSION_NUMBER (LZ4_VERSION_MAJOR *100*100 + LZ4_VERSION_MINOR *100 + LZ4_VERSION_RELEASE) @@ -186,7 +186,8 @@ LZ4LIB_API int LZ4_compressBound(int inputSize); The larger the acceleration value, the faster the algorithm, but also the lesser the compression. It's a trade-off. It can be fine tuned, with each successive value providing roughly +~3% to speed. An acceleration value of "1" is the same as regular LZ4_compress_default() - Values <= 0 will be replaced by ACCELERATION_DEFAULT (currently == 1, see lz4.c). + Values <= 0 will be replaced by LZ4_ACCELERATION_DEFAULT (currently == 1, see lz4.c). + Values > LZ4_ACCELERATION_MAX will be replaced by LZ4_ACCELERATION_MAX (currently == 65537, see lz4.c). */ LZ4LIB_API int LZ4_compress_fast (const char* src, char* dst, int srcSize, int dstCapacity, int acceleration); @@ -212,7 +213,18 @@ LZ4LIB_API int LZ4_compress_fast_extState (void* state, const char* src, char* d * New value is necessarily <= input value. * @return : Nb bytes written into 'dst' (necessarily <= targetDestSize) * or 0 if compression fails. -*/ + * + * Note : from v1.8.2 to v1.9.1, this function had a bug (fixed un v1.9.2+): + * the produced compressed content could, in specific circumstances, + * require to be decompressed into a destination buffer larger + * by at least 1 byte than the content to decompress. + * If an application uses `LZ4_compress_destSize()`, + * it's highly recommended to update liblz4 to v1.9.2 or better. + * If this can't be done or ensured, + * the receiving decompression function should provide + * a dstCapacity which is > decompressedSize, by at least 1 byte. + * See https://github.com/lz4/lz4/issues/859 for details + */ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePtr, int targetDstSize); @@ -220,25 +232,35 @@ LZ4LIB_API int LZ4_compress_destSize (const char* src, char* dst, int* srcSizePt * Decompress an LZ4 compressed block, of size 'srcSize' at position 'src', * into destination buffer 'dst' of size 'dstCapacity'. * Up to 'targetOutputSize' bytes will be decoded. - * The function stops decoding on reaching this objective, - * which can boost performance when only the beginning of a block is required. + * The function stops decoding on reaching this objective. + * This can be useful to boost performance + * whenever only the beginning of a block is required. * - * @return : the number of bytes decoded in `dst` (necessarily <= dstCapacity) + * @return : the number of bytes decoded in `dst` (necessarily <= targetOutputSize) * If source stream is detected malformed, function returns a negative result. * - * Note : @return can be < targetOutputSize, if compressed block contains less data. + * Note 1 : @return can be < targetOutputSize, if compressed block contains less data. * - * Note 2 : this function features 2 parameters, targetOutputSize and dstCapacity, - * and expects targetOutputSize <= dstCapacity. - * It effectively stops decoding on reaching targetOutputSize, + * Note 2 : targetOutputSize must be <= dstCapacity + * + * Note 3 : this function effectively stops decoding on reaching targetOutputSize, * so dstCapacity is kind of redundant. - * This is because in a previous version of this function, - * decoding operation would not "break" a sequence in the middle. - * As a consequence, there was no guarantee that decoding would stop at exactly targetOutputSize, + * This is because in older versions of this function, + * decoding operation would still write complete sequences. + * Therefore, there was no guarantee that it would stop writing at exactly targetOutputSize, * it could write more bytes, though only up to dstCapacity. * Some "margin" used to be required for this operation to work properly. - * This is no longer necessary. - * The function nonetheless keeps its signature, in an effort to not break API. + * Thankfully, this is no longer necessary. + * The function nonetheless keeps the same signature, in an effort to preserve API compatibility. + * + * Note 4 : If srcSize is the exact size of the block, + * then targetOutputSize can be any value, + * including larger than the block's decompressed size. + * The function will, at most, generate block's decompressed size. + * + * Note 5 : If srcSize is _larger_ than block's compressed size, + * then targetOutputSize **MUST** be <= block's decompressed size. + * Otherwise, *silent corruption will occur*. */ LZ4LIB_API int LZ4_decompress_safe_partial (const char* src, char* dst, int srcSize, int targetOutputSize, int dstCapacity); @@ -547,74 +569,64 @@ LZ4LIB_STATIC_API void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const #define LZ4_H_98237428734687 /*-************************************************************ - * PRIVATE DEFINITIONS + * Private Definitions ************************************************************** * Do not use these definitions directly. * They are only exposed to allow static allocation of `LZ4_stream_t` and `LZ4_streamDecode_t`. - * Accessing members will expose code to API and/or ABI break in future versions of the library. + * Accessing members will expose user code to API and/or ABI break in future versions of the library. **************************************************************/ #define LZ4_HASHLOG (LZ4_MEMORY_USAGE-2) #define LZ4_HASHTABLESIZE (1 << LZ4_MEMORY_USAGE) #define LZ4_HASH_SIZE_U32 (1 << LZ4_HASHLOG) /* required as macro for static allocation */ #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -#include - -typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; -struct LZ4_stream_t_internal { - uint32_t hashTable[LZ4_HASH_SIZE_U32]; - uint32_t currentOffset; - uint16_t dirty; - uint16_t tableType; - const uint8_t* dictionary; - const LZ4_stream_t_internal* dictCtx; - uint32_t dictSize; -}; - -typedef struct { - const uint8_t* externalDict; - size_t extDictSize; - const uint8_t* prefixEnd; - size_t prefixSize; -} LZ4_streamDecode_t_internal; - +# include + typedef int8_t LZ4_i8; + typedef uint8_t LZ4_byte; + typedef uint16_t LZ4_u16; + typedef uint32_t LZ4_u32; #else - -typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; -struct LZ4_stream_t_internal { - unsigned int hashTable[LZ4_HASH_SIZE_U32]; - unsigned int currentOffset; - unsigned short dirty; - unsigned short tableType; - const unsigned char* dictionary; - const LZ4_stream_t_internal* dictCtx; - unsigned int dictSize; -}; - -typedef struct { - const unsigned char* externalDict; - const unsigned char* prefixEnd; - size_t extDictSize; - size_t prefixSize; -} LZ4_streamDecode_t_internal; - + typedef signed char LZ4_i8; + typedef unsigned char LZ4_byte; + typedef unsigned short LZ4_u16; + typedef unsigned int LZ4_u32; #endif +typedef struct LZ4_stream_t_internal LZ4_stream_t_internal; +struct LZ4_stream_t_internal { + LZ4_u32 hashTable[LZ4_HASH_SIZE_U32]; + LZ4_u32 currentOffset; + LZ4_u32 tableType; + const LZ4_byte* dictionary; + const LZ4_stream_t_internal* dictCtx; + LZ4_u32 dictSize; +}; + +typedef struct { + const LZ4_byte* externalDict; + size_t extDictSize; + const LZ4_byte* prefixEnd; + size_t prefixSize; +} LZ4_streamDecode_t_internal; + + /*! LZ4_stream_t : - * information structure to track an LZ4 stream. + * Do not use below internal definitions directly ! + * Declare or allocate an LZ4_stream_t instead. * LZ4_stream_t can also be created using LZ4_createStream(), which is recommended. * The structure definition can be convenient for static allocation * (on stack, or as part of larger structure). * Init this structure with LZ4_initStream() before first use. * note : only use this definition in association with static linking ! - * this definition is not API/ABI safe, and may change in a future version. + * this definition is not API/ABI safe, and may change in future versions. */ -#define LZ4_STREAMSIZE_U64 ((1 << (LZ4_MEMORY_USAGE-3)) + 4 + ((sizeof(void*)==16) ? 4 : 0) /*AS-400*/ ) -#define LZ4_STREAMSIZE (LZ4_STREAMSIZE_U64 * sizeof(unsigned long long)) +#define LZ4_STREAMSIZE 16416 /* static size, for inter-version compatibility */ +#define LZ4_STREAMSIZE_VOIDP (LZ4_STREAMSIZE / sizeof(void*)) union LZ4_stream_u { - unsigned long long table[LZ4_STREAMSIZE_U64]; + void* table[LZ4_STREAMSIZE_VOIDP]; LZ4_stream_t_internal internal_donotuse; -} ; /* previously typedef'd to LZ4_stream_t */ +}; /* previously typedef'd to LZ4_stream_t */ + /*! LZ4_initStream() : v1.9.0+ * An LZ4_stream_t structure must be initialized at least once. @@ -667,22 +679,21 @@ union LZ4_streamDecode_u { #ifdef LZ4_DISABLE_DEPRECATE_WARNINGS # define LZ4_DEPRECATED(message) /* disable deprecation warnings */ #else -# define LZ4_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) # if defined (__cplusplus) && (__cplusplus >= 201402) /* C++14 or greater */ # define LZ4_DEPRECATED(message) [[deprecated(message)]] -# elif (LZ4_GCC_VERSION >= 405) || defined(__clang__) -# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) -# elif (LZ4_GCC_VERSION >= 301) -# define LZ4_DEPRECATED(message) __attribute__((deprecated)) # elif defined(_MSC_VER) # define LZ4_DEPRECATED(message) __declspec(deprecated(message)) +# elif defined(__clang__) || (defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 45)) +# define LZ4_DEPRECATED(message) __attribute__((deprecated(message))) +# elif defined(__GNUC__) && (__GNUC__ * 10 + __GNUC_MINOR__ >= 31) +# define LZ4_DEPRECATED(message) __attribute__((deprecated)) # else -# pragma message("WARNING: You need to implement LZ4_DEPRECATED for this compiler") -# define LZ4_DEPRECATED(message) +# pragma message("WARNING: LZ4_DEPRECATED needs custom implementation for this compiler") +# define LZ4_DEPRECATED(message) /* disabled */ # endif #endif /* LZ4_DISABLE_DEPRECATE_WARNINGS */ -/* Obsolete compression functions */ +/*! Obsolete compression functions (since v1.7.3) */ LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress (const char* src, char* dest, int srcSize); LZ4_DEPRECATED("use LZ4_compress_default() instead") LZ4LIB_API int LZ4_compress_limitedOutput (const char* src, char* dest, int srcSize, int maxOutputSize); LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_compress_withState (void* state, const char* source, char* dest, int inputSize); @@ -690,11 +701,12 @@ LZ4_DEPRECATED("use LZ4_compress_fast_extState() instead") LZ4LIB_API int LZ4_co LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize); LZ4_DEPRECATED("use LZ4_compress_fast_continue() instead") LZ4LIB_API int LZ4_compress_limitedOutput_continue (LZ4_stream_t* LZ4_streamPtr, const char* source, char* dest, int inputSize, int maxOutputSize); -/* Obsolete decompression functions */ +/*! Obsolete decompression functions (since v1.8.0) */ LZ4_DEPRECATED("use LZ4_decompress_fast() instead") LZ4LIB_API int LZ4_uncompress (const char* source, char* dest, int outputSize); LZ4_DEPRECATED("use LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize); -/* Obsolete streaming functions; degraded functionality; do not use! +/* Obsolete streaming functions (since v1.7.0) + * degraded functionality; do not use! * * In order to perform streaming compression, these functions depended on data * that is no longer tracked in the state. They have been preserved as well as @@ -708,23 +720,22 @@ LZ4_DEPRECATED("Use LZ4_createStream() instead") LZ4LIB_API int LZ4_sizeofStre LZ4_DEPRECATED("Use LZ4_resetStream() instead") LZ4LIB_API int LZ4_resetStreamState(void* state, char* inputBuffer); LZ4_DEPRECATED("Use LZ4_saveDict() instead") LZ4LIB_API char* LZ4_slideInputBuffer (void* state); -/* Obsolete streaming decoding functions */ +/*! Obsolete streaming decoding functions (since v1.7.0) */ LZ4_DEPRECATED("use LZ4_decompress_safe_usingDict() instead") LZ4LIB_API int LZ4_decompress_safe_withPrefix64k (const char* src, char* dst, int compressedSize, int maxDstSize); LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4_decompress_fast_withPrefix64k (const char* src, char* dst, int originalSize); -/*! LZ4_decompress_fast() : **unsafe!** +/*! Obsolete LZ4_decompress_fast variants (since v1.9.0) : * These functions used to be faster than LZ4_decompress_safe(), - * but it has changed, and they are now slower than LZ4_decompress_safe(). + * but this is no longer the case. They are now slower. * This is because LZ4_decompress_fast() doesn't know the input size, - * and therefore must progress more cautiously in the input buffer to not read beyond the end of block. + * and therefore must progress more cautiously into the input buffer to not read beyond the end of block. * On top of that `LZ4_decompress_fast()` is not protected vs malformed or malicious inputs, making it a security liability. * As a consequence, LZ4_decompress_fast() is strongly discouraged, and deprecated. * * The last remaining LZ4_decompress_fast() specificity is that * it can decompress a block without knowing its compressed size. - * Such functionality could be achieved in a more secure manner, - * by also providing the maximum size of input buffer, - * but it would require new prototypes, and adaptation of the implementation to this new use case. + * Such functionality can be achieved in a more secure manner + * by employing LZ4_decompress_safe_partial(). * * Parameters: * originalSize : is the uncompressed size to regenerate. @@ -739,7 +750,6 @@ LZ4_DEPRECATED("use LZ4_decompress_fast_usingDict() instead") LZ4LIB_API int LZ4 * But they may happen if input data is invalid (error or intentional tampering). * As a consequence, use these functions in trusted environments with trusted data **only**. */ - LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe() instead") LZ4LIB_API int LZ4_decompress_fast (const char* src, char* dst, int originalSize); LZ4_DEPRECATED("This function is deprecated and unsafe. Consider using LZ4_decompress_safe_continue() instead") diff --git a/lib/lz4frame.c b/lib/lz4frame.c index c9f630d..ec02c92 100644 --- a/lib/lz4frame.c +++ b/lib/lz4frame.c @@ -71,8 +71,8 @@ * towards another library or solution of their choice * by modifying below section. */ -#include /* malloc, calloc, free */ #ifndef LZ4_SRC_INCLUDED /* avoid redefinition when sources are coalesced */ +# include /* malloc, calloc, free */ # define ALLOC(s) malloc(s) # define ALLOC_AND_ZERO(s) calloc(1,(s)) # define FREEMEM(p) free(p) @@ -533,7 +533,7 @@ void LZ4F_freeCDict(LZ4F_CDict* cdict) * If the result LZ4F_errorCode_t is not OK_NoError, there was an error during context creation. * Object can release its memory using LZ4F_freeCompressionContext(); */ -LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_compressionContextPtr, unsigned version) +LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_cctx** LZ4F_compressionContextPtr, unsigned version) { LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)ALLOC_AND_ZERO(sizeof(LZ4F_cctx_t)); if (cctxPtr==NULL) return err0r(LZ4F_ERROR_allocation_failed); @@ -541,20 +541,18 @@ LZ4F_errorCode_t LZ4F_createCompressionContext(LZ4F_compressionContext_t* LZ4F_c cctxPtr->version = version; cctxPtr->cStage = 0; /* Next stage : init stream */ - *LZ4F_compressionContextPtr = (LZ4F_compressionContext_t)cctxPtr; + *LZ4F_compressionContextPtr = cctxPtr; return LZ4F_OK_NoError; } -LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_compressionContext_t LZ4F_compressionContext) +LZ4F_errorCode_t LZ4F_freeCompressionContext(LZ4F_cctx* cctxPtr) { - LZ4F_cctx_t* const cctxPtr = (LZ4F_cctx_t*)LZ4F_compressionContext; - if (cctxPtr != NULL) { /* support free on NULL */ - FREEMEM(cctxPtr->lz4CtxPtr); /* works because LZ4_streamHC_t and LZ4_stream_t are simple POD types */ + FREEMEM(cctxPtr->lz4CtxPtr); /* note: LZ4_streamHC_t and LZ4_stream_t are simple POD types */ FREEMEM(cctxPtr->tmpBuff); - FREEMEM(LZ4F_compressionContext); + FREEMEM(cctxPtr); } return LZ4F_OK_NoError; @@ -725,6 +723,9 @@ size_t LZ4F_compressBegin(LZ4F_cctx* cctxPtr, */ size_t LZ4F_compressBound(size_t srcSize, const LZ4F_preferences_t* preferencesPtr) { + if (preferencesPtr && preferencesPtr->autoFlush) { + return LZ4F_compressBound_internal(srcSize, preferencesPtr, 0); + } return LZ4F_compressBound_internal(srcSize, preferencesPtr, (size_t)-1); } @@ -747,6 +748,7 @@ static size_t LZ4F_makeBlock(void* dst, (int)(srcSize), (int)(srcSize-1), level, cdict); if (cSize == 0) { /* compression failed */ + DEBUGLOG(5, "LZ4F_makeBlock: compression failed, creating a raw block (size %u)", (U32)srcSize); cSize = (U32)srcSize; LZ4F_writeLE32(cSizePtr, cSize | LZ4F_BLOCKUNCOMPRESSED_FLAG); memcpy(cSizePtr+BHSize, src, srcSize); @@ -989,6 +991,7 @@ size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr, BYTE* dstPtr = dstStart; size_t const flushSize = LZ4F_flush(cctxPtr, dstBuffer, dstCapacity, compressOptionsPtr); + DEBUGLOG(5,"LZ4F_compressEnd: dstCapacity=%u", (unsigned)dstCapacity); if (LZ4F_isError(flushSize)) return flushSize; dstPtr += flushSize; @@ -1002,6 +1005,7 @@ size_t LZ4F_compressEnd(LZ4F_cctx* cctxPtr, if (cctxPtr->prefs.frameInfo.contentChecksumFlag == LZ4F_contentChecksumEnabled) { U32 const xxh = XXH32_digest(&(cctxPtr->xxh)); if (dstCapacity < 8) return err0r(LZ4F_ERROR_dstMaxSize_tooSmall); + DEBUGLOG(5,"Writing 32-bit content checksum"); LZ4F_writeLE32(dstPtr, xxh); dstPtr+=4; /* content Checksum */ } @@ -1112,6 +1116,7 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize size_t frameHeaderSize; const BYTE* srcPtr = (const BYTE*)src; + DEBUGLOG(5, "LZ4F_decodeHeader"); /* need to decode header to get frameInfo */ if (srcSize < minFHSize) return err0r(LZ4F_ERROR_frameHeader_incomplete); /* minimal frame header size */ MEM_INIT(&(dctx->frameInfo), 0, sizeof(dctx->frameInfo)); @@ -1132,8 +1137,10 @@ static size_t LZ4F_decodeHeader(LZ4F_dctx* dctx, const void* src, size_t srcSize /* control magic number */ #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) + if (LZ4F_readLE32(srcPtr) != LZ4F_MAGICNUMBER) { + DEBUGLOG(4, "frame header error : unknown magic number"); return err0r(LZ4F_ERROR_frameType_unknown); + } #endif dctx->frameInfo.frameType = LZ4F_frame; @@ -1282,15 +1289,20 @@ LZ4F_errorCode_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, /* LZ4F_updateDict() : - * only used for LZ4F_blockLinked mode */ + * only used for LZ4F_blockLinked mode + * Condition : dstPtr != NULL + */ static void LZ4F_updateDict(LZ4F_dctx* dctx, const BYTE* dstPtr, size_t dstSize, const BYTE* dstBufferStart, unsigned withinTmp) { - if (dctx->dictSize==0) - dctx->dict = (const BYTE*)dstPtr; /* priority to dictionary continuity */ + assert(dstPtr != NULL); + if (dctx->dictSize==0) { + dctx->dict = (const BYTE*)dstPtr; /* priority to prefix mode */ + } + assert(dctx->dict != NULL); - if (dctx->dict + dctx->dictSize == dstPtr) { /* dictionary continuity, directly within dstBuffer */ + if (dctx->dict + dctx->dictSize == dstPtr) { /* prefix mode, everything within dstBuffer */ dctx->dictSize += dstSize; return; } @@ -1304,9 +1316,10 @@ static void LZ4F_updateDict(LZ4F_dctx* dctx, assert(dstSize < 64 KB); /* if dstSize >= 64 KB, dictionary would be set into dstBuffer directly */ - /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOut */ + /* dstBuffer does not contain whole useful history (64 KB), so it must be saved within tmpOutBuffer */ + assert(dctx->tmpOutBuffer != NULL); - if ((withinTmp) && (dctx->dict == dctx->tmpOutBuffer)) { /* continue history within tmpOutBuffer */ + if (withinTmp && (dctx->dict == dctx->tmpOutBuffer)) { /* continue history within tmpOutBuffer */ /* withinTmp expectation : content of [dstPtr,dstSize] is same as [dict+dictSize,dstSize], so we just extend it */ assert(dctx->dict + dctx->dictSize == dctx->tmpOut + dctx->tmpOutStart); dctx->dictSize += dstSize; @@ -1378,17 +1391,21 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, const BYTE* const srcEnd = srcStart + *srcSizePtr; const BYTE* srcPtr = srcStart; BYTE* const dstStart = (BYTE*)dstBuffer; - BYTE* const dstEnd = dstStart + *dstSizePtr; + BYTE* const dstEnd = dstStart ? dstStart + *dstSizePtr : NULL; BYTE* dstPtr = dstStart; const BYTE* selectedIn = NULL; unsigned doAnotherStage = 1; size_t nextSrcSizeHint = 1; + DEBUGLOG(5, "LZ4F_decompress : %p,%u => %p,%u", + srcBuffer, (unsigned)*srcSizePtr, dstBuffer, (unsigned)*dstSizePtr); + if (dstBuffer == NULL) assert(*dstSizePtr == 0); MEM_INIT(&optionsNull, 0, sizeof(optionsNull)); if (decompressOptionsPtr==NULL) decompressOptionsPtr = &optionsNull; *srcSizePtr = 0; *dstSizePtr = 0; + assert(dctx != NULL); /* behaves as a state machine */ @@ -1398,6 +1415,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, { case dstage_getFrameHeader: + DEBUGLOG(6, "dstage_getFrameHeader"); if ((size_t)(srcEnd-srcPtr) >= maxFHSize) { /* enough to decode - shortcut */ size_t const hSize = LZ4F_decodeHeader(dctx, srcPtr, (size_t)(srcEnd-srcPtr)); /* will update dStage appropriately */ if (LZ4F_isError(hSize)) return hSize; @@ -1411,6 +1429,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, /* fall-through */ case dstage_storeFrameHeader: + DEBUGLOG(6, "dstage_storeFrameHeader"); { size_t const sizeToCopy = MIN(dctx->tmpInTarget - dctx->tmpInSize, (size_t)(srcEnd - srcPtr)); memcpy(dctx->header + dctx->tmpInSize, srcPtr, sizeToCopy); dctx->tmpInSize += sizeToCopy; @@ -1427,6 +1446,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, break; case dstage_init: + DEBUGLOG(6, "dstage_init"); if (dctx->frameInfo.contentChecksumFlag) (void)XXH32_reset(&(dctx->xxh), 0); /* internal buffers allocation */ { size_t const bufferNeeded = dctx->maxBlockSize @@ -1480,17 +1500,21 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, } /* if (dctx->dStage == dstage_storeBlockHeader) */ /* decode block header */ - { size_t const nextCBlockSize = LZ4F_readLE32(selectedIn) & 0x7FFFFFFFU; + { U32 const blockHeader = LZ4F_readLE32(selectedIn); + size_t const nextCBlockSize = blockHeader & 0x7FFFFFFFU; size_t const crcSize = dctx->frameInfo.blockChecksumFlag * BFSize; - if (nextCBlockSize==0) { /* frameEnd signal, no more block */ + if (blockHeader==0) { /* frameEnd signal, no more block */ + DEBUGLOG(5, "end of frame"); dctx->dStage = dstage_getSuffix; break; } - if (nextCBlockSize > dctx->maxBlockSize) + if (nextCBlockSize > dctx->maxBlockSize) { return err0r(LZ4F_ERROR_maxBlockSize_invalid); - if (LZ4F_readLE32(selectedIn) & LZ4F_BLOCKUNCOMPRESSED_FLAG) { + } + if (blockHeader & LZ4F_BLOCKUNCOMPRESSED_FLAG) { /* next block is uncompressed */ dctx->tmpInTarget = nextCBlockSize; + DEBUGLOG(5, "next block is uncompressed (size %u)", (U32)nextCBlockSize); if (dctx->frameInfo.blockChecksumFlag) { (void)XXH32_reset(&dctx->blockChecksum, 0); } @@ -1508,20 +1532,26 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, } case dstage_copyDirect: /* uncompressed block */ - { size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr)); - size_t const sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize); - memcpy(dstPtr, srcPtr, sizeToCopy); - if (dctx->frameInfo.blockChecksumFlag) { - (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy); - } - if (dctx->frameInfo.contentChecksumFlag) - (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy); - if (dctx->frameInfo.contentSize) - dctx->frameRemainingSize -= sizeToCopy; + DEBUGLOG(6, "dstage_copyDirect"); + { size_t sizeToCopy; + if (dstPtr == NULL) { + sizeToCopy = 0; + } else { + size_t const minBuffSize = MIN((size_t)(srcEnd-srcPtr), (size_t)(dstEnd-dstPtr)); + sizeToCopy = MIN(dctx->tmpInTarget, minBuffSize); + memcpy(dstPtr, srcPtr, sizeToCopy); + if (dctx->frameInfo.blockChecksumFlag) { + (void)XXH32_update(&dctx->blockChecksum, srcPtr, sizeToCopy); + } + if (dctx->frameInfo.contentChecksumFlag) + (void)XXH32_update(&dctx->xxh, srcPtr, sizeToCopy); + if (dctx->frameInfo.contentSize) + dctx->frameRemainingSize -= sizeToCopy; - /* history management (linked blocks only)*/ - if (dctx->frameInfo.blockMode == LZ4F_blockLinked) - LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0); + /* history management (linked blocks only)*/ + if (dctx->frameInfo.blockMode == LZ4F_blockLinked) { + LZ4F_updateDict(dctx, dstPtr, sizeToCopy, dstStart, 0); + } } srcPtr += sizeToCopy; dstPtr += sizeToCopy; @@ -1534,15 +1564,16 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, break; } dctx->tmpInTarget -= sizeToCopy; /* need to copy more */ - nextSrcSizeHint = dctx->tmpInTarget + - +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0) - + BHSize /* next header size */; - doAnotherStage = 0; - break; } + nextSrcSizeHint = dctx->tmpInTarget + + +(dctx->frameInfo.blockChecksumFlag ? BFSize : 0) + + BHSize /* next header size */; + doAnotherStage = 0; + break; /* check block checksum for recently transferred uncompressed block */ case dstage_getBlockChecksum: + DEBUGLOG(6, "dstage_getBlockChecksum"); { const void* crcSrc; if ((srcEnd-srcPtr >= 4) && (dctx->tmpInSize==0)) { crcSrc = srcPtr; @@ -1562,8 +1593,12 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, { U32 const readCRC = LZ4F_readLE32(crcSrc); U32 const calcCRC = XXH32_digest(&dctx->blockChecksum); #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION - if (readCRC != calcCRC) + DEBUGLOG(6, "compare block checksum"); + if (readCRC != calcCRC) { + DEBUGLOG(4, "incorrect block checksum: %08X != %08X", + readCRC, calcCRC); return err0r(LZ4F_ERROR_blockChecksum_invalid); + } #else (void)readCRC; (void)calcCRC; @@ -1573,6 +1608,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, break; case dstage_getCBlock: + DEBUGLOG(6, "dstage_getCBlock"); if ((size_t)(srcEnd-srcPtr) < dctx->tmpInTarget) { dctx->tmpInSize = 0; dctx->dStage = dstage_storeCBlock; @@ -1582,7 +1618,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, selectedIn = srcPtr; srcPtr += dctx->tmpInTarget; - if (0) /* jump over next block */ + if (0) /* always jump over next block */ case dstage_storeCBlock: { size_t const wantedData = dctx->tmpInTarget - dctx->tmpInSize; size_t const inputLeft = (size_t)(srcEnd-srcPtr); @@ -1619,6 +1655,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, const char* dict = (const char*)dctx->dict; size_t dictSize = dctx->dictSize; int decodedSize; + assert(dstPtr != NULL); if (dict && dictSize > 1 GB) { /* the dictSize param is an int, avoid truncation / sign issues */ dict += dictSize - 64 KB; @@ -1636,8 +1673,9 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, dctx->frameRemainingSize -= (size_t)decodedSize; /* dictionary management */ - if (dctx->frameInfo.blockMode==LZ4F_blockLinked) + if (dctx->frameInfo.blockMode==LZ4F_blockLinked) { LZ4F_updateDict(dctx, dstPtr, (size_t)decodedSize, dstStart, 0); + } dstPtr += decodedSize; dctx->dStage = dstage_getBlockHeader; @@ -1684,7 +1722,9 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, /* fall-through */ case dstage_flushOut: /* flush decoded data from tmpOut to dstBuffer */ - { size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr)); + DEBUGLOG(6, "dstage_flushOut"); + if (dstPtr != NULL) { + size_t const sizeToCopy = MIN(dctx->tmpOutSize - dctx->tmpOutStart, (size_t)(dstEnd-dstPtr)); memcpy(dstPtr, dctx->tmpOut + dctx->tmpOutStart, sizeToCopy); /* dictionary management */ @@ -1693,16 +1733,15 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, dctx->tmpOutStart += sizeToCopy; dstPtr += sizeToCopy; - - if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */ - dctx->dStage = dstage_getBlockHeader; /* get next block */ - break; - } - /* could not flush everything : stop there, just request a block header */ - doAnotherStage = 0; - nextSrcSizeHint = BHSize; + } + if (dctx->tmpOutStart == dctx->tmpOutSize) { /* all flushed */ + dctx->dStage = dstage_getBlockHeader; /* get next block */ break; } + /* could not flush everything : stop there, just request a block header */ + doAnotherStage = 0; + nextSrcSizeHint = BHSize; + break; case dstage_getSuffix: if (dctx->frameRemainingSize) @@ -1806,6 +1845,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, LZ4F_STATIC_ASSERT((unsigned)dstage_init == 2); if ( (dctx->frameInfo.blockMode==LZ4F_blockLinked) /* next block will use up to 64KB from previous ones */ && (dctx->dict != dctx->tmpOutBuffer) /* dictionary is not already within tmp */ + && (dctx->dict != NULL) /* dictionary exists */ && (!decompressOptionsPtr->stableDst) /* cannot rely on dst data to remain there for next call */ && ((unsigned)(dctx->dStage)-2 < (unsigned)(dstage_getSuffix)-2) ) /* valid stages : [init ... getSuffix[ */ { @@ -1815,9 +1855,9 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, const BYTE* oldDictEnd = dctx->dict + dctx->dictSize - dctx->tmpOutStart; if (dctx->tmpOutSize > 64 KB) copySize = 0; if (copySize > preserveSize) copySize = preserveSize; + assert(dctx->tmpOutBuffer != NULL); - if (copySize > 0) - memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize); + memcpy(dctx->tmpOutBuffer + preserveSize - copySize, oldDictEnd - copySize, copySize); dctx->dict = dctx->tmpOutBuffer; dctx->dictSize = preserveSize + dctx->tmpOutStart; @@ -1825,8 +1865,7 @@ size_t LZ4F_decompress(LZ4F_dctx* dctx, const BYTE* const oldDictEnd = dctx->dict + dctx->dictSize; size_t const newDictSize = MIN(dctx->dictSize, 64 KB); - if (newDictSize > 0) - memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize); + memcpy(dctx->tmpOutBuffer, oldDictEnd - newDictSize, newDictSize); dctx->dict = dctx->tmpOutBuffer; dctx->dictSize = newDictSize; diff --git a/lib/lz4frame.h b/lib/lz4frame.h index 391e484..4573317 100644 --- a/lib/lz4frame.h +++ b/lib/lz4frame.h @@ -66,17 +66,22 @@ extern "C" { *****************************************************************/ /* LZ4_DLL_EXPORT : * Enable exporting of functions when building a Windows DLL - * LZ4FLIB_API : + * LZ4FLIB_VISIBILITY : * Control library symbols visibility. */ +#ifndef LZ4FLIB_VISIBILITY +# if defined(__GNUC__) && (__GNUC__ >= 4) +# define LZ4FLIB_VISIBILITY __attribute__ ((visibility ("default"))) +# else +# define LZ4FLIB_VISIBILITY +# endif +#endif #if defined(LZ4_DLL_EXPORT) && (LZ4_DLL_EXPORT==1) -# define LZ4FLIB_API __declspec(dllexport) +# define LZ4FLIB_API __declspec(dllexport) LZ4FLIB_VISIBILITY #elif defined(LZ4_DLL_IMPORT) && (LZ4_DLL_IMPORT==1) -# define LZ4FLIB_API __declspec(dllimport) -#elif defined(__GNUC__) && (__GNUC__ >= 4) -# define LZ4FLIB_API __attribute__ ((__visibility__ ("default"))) +# define LZ4FLIB_API __declspec(dllimport) LZ4FLIB_VISIBILITY #else -# define LZ4FLIB_API +# define LZ4FLIB_API LZ4FLIB_VISIBILITY #endif #ifdef LZ4F_DISABLE_DEPRECATE_WARNINGS @@ -103,7 +108,7 @@ LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code); /**< return /*-************************************ * Frame compression types - **************************************/ + ************************************* */ /* #define LZ4F_ENABLE_OBSOLETE_ENUMS // uncomment to enable obsolete enums */ #ifdef LZ4F_ENABLE_OBSOLETE_ENUMS # define LZ4F_OBSOLETE_ENUM(x) , LZ4F_DEPRECATE(x) = LZ4F_##x @@ -113,7 +118,8 @@ LZ4FLIB_API const char* LZ4F_getErrorName(LZ4F_errorCode_t code); /**< return /* The larger the block size, the (slightly) better the compression ratio, * though there are diminishing returns. - * Larger blocks also increase memory usage on both compression and decompression sides. */ + * Larger blocks also increase memory usage on both compression and decompression sides. + */ typedef enum { LZ4F_default=0, LZ4F_max64KB=4, @@ -284,7 +290,7 @@ LZ4FLIB_API size_t LZ4F_compressBegin(LZ4F_cctx* cctx, * @return is always the same for a srcSize and prefsPtr. * prefsPtr is optional : when NULL is provided, preferences will be set to cover worst case scenario. * tech details : - * @return includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes. + * @return if automatic flushing is not enabled, includes the possibility that internal buffer might already be filled by up to (blockSize-1) bytes. * It also includes frame footer (ending + checksum), since it might be generated by LZ4F_compressEnd(). * @return doesn't include frame header, as it was already generated by LZ4F_compressBegin(). */ @@ -376,7 +382,7 @@ LZ4FLIB_API LZ4F_errorCode_t LZ4F_freeDecompressionContext(LZ4F_dctx* dctx); * note : Frame header size is variable, but is guaranteed to be * >= LZ4F_HEADER_SIZE_MIN bytes, and <= LZ4F_HEADER_SIZE_MAX bytes. */ -size_t LZ4F_headerSize(const void* src, size_t srcSize); +LZ4FLIB_API size_t LZ4F_headerSize(const void* src, size_t srcSize); /*! LZ4F_getFrameInfo() : * This function extracts frame parameters (max blockSize, dictID, etc.). @@ -426,8 +432,10 @@ LZ4FLIB_API size_t LZ4F_getFrameInfo(LZ4F_dctx* dctx, const void* srcBuffer, size_t* srcSizePtr); /*! LZ4F_decompress() : - * Call this function repetitively to regenerate compressed data from `srcBuffer`. - * The function will read up to *srcSizePtr bytes from srcBuffer, + * Call this function repetitively to regenerate data compressed in `srcBuffer`. + * + * The function requires a valid dctx state. + * It will read up to *srcSizePtr bytes from srcBuffer, * and decompress data into dstBuffer, of capacity *dstSizePtr. * * The nb of bytes consumed from srcBuffer will be written into *srcSizePtr (necessarily <= original value). @@ -493,9 +501,9 @@ extern "C" { * Use at your own risk. */ #ifdef LZ4F_PUBLISH_STATIC_FUNCTIONS -#define LZ4FLIB_STATIC_API LZ4FLIB_API +# define LZ4FLIB_STATIC_API LZ4FLIB_API #else -#define LZ4FLIB_STATIC_API +# define LZ4FLIB_STATIC_API #endif diff --git a/lib/lz4hc.c b/lib/lz4hc.c index 5922ed7..77c9f43 100644 --- a/lib/lz4hc.c +++ b/lib/lz4hc.c @@ -53,7 +53,7 @@ #include "lz4hc.h" -/*=== Common LZ4 definitions ===*/ +/*=== Common definitions ===*/ #if defined(__GNUC__) # pragma GCC diagnostic ignored "-Wunused-function" #endif @@ -61,15 +61,16 @@ # pragma clang diagnostic ignored "-Wunused-function" #endif -/*=== Enums ===*/ -typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; - - #define LZ4_COMMONDEFS_ONLY #ifndef LZ4_SRC_INCLUDED #include "lz4.c" /* LZ4_count, constants, mem */ #endif + +/*=== Enums ===*/ +typedef enum { noDictCtx, usingDictCtxHc } dictCtx_directive; + + /*=== Constants ===*/ #define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH) #define LZ4_OPT_NUM (1<<12) @@ -92,7 +93,7 @@ static U32 LZ4HC_hashPtr(const void* ptr) { return HASH_FUNCTION(LZ4_read32(ptr) **************************************/ static void LZ4HC_clearTables (LZ4HC_CCtx_internal* hc4) { - MEM_INIT((void*)hc4->hashTable, 0, sizeof(hc4->hashTable)); + MEM_INIT(hc4->hashTable, 0, sizeof(hc4->hashTable)); MEM_INIT(hc4->chainTable, 0xFF, sizeof(hc4->chainTable)); } @@ -161,8 +162,7 @@ int LZ4HC_countBack(const BYTE* const ip, const BYTE* const match, static U32 LZ4HC_rotatePattern(size_t const rotate, U32 const pattern) { size_t const bitsToRotate = (rotate & (sizeof(pattern) - 1)) << 3; - if (bitsToRotate == 0) - return pattern; + if (bitsToRotate == 0) return pattern; return LZ4HC_rotl32(pattern, (int)bitsToRotate); } @@ -172,7 +172,8 @@ static unsigned LZ4HC_countPattern(const BYTE* ip, const BYTE* const iEnd, U32 const pattern32) { const BYTE* const iStart = ip; - reg_t const pattern = (sizeof(pattern)==8) ? (reg_t)pattern32 + (((reg_t)pattern32) << 32) : pattern32; + reg_t const pattern = (sizeof(pattern)==8) ? + (reg_t)pattern32 + (((reg_t)pattern32) << (sizeof(pattern)*4)) : pattern32; while (likely(ip < iEnd-(sizeof(pattern)-1))) { reg_t const diff = LZ4_read_ARCH(ip) ^ pattern; @@ -270,7 +271,7 @@ LZ4HC_InsertAndGetWiderMatch ( DEBUGLOG(7, "First match at index %u / %u (lowestMatchIndex)", matchIndex, lowestMatchIndex); - while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) { + while ((matchIndex>=lowestMatchIndex) && (nbAttempts>0)) { int matchLength=0; nbAttempts--; assert(matchIndex < ipIndex); @@ -389,8 +390,8 @@ LZ4HC_InsertAndGetWiderMatch ( if (lookBackLength==0) { /* no back possible */ size_t const maxML = MIN(currentSegmentLength, srcPatternLength); if ((size_t)longest < maxML) { - assert(base + matchIndex < ip); - if (ip - (base+matchIndex) > LZ4_DISTANCE_MAX) break; + assert(base + matchIndex != ip); + if ((size_t)(ip - base) - matchIndex > LZ4_DISTANCE_MAX) break; assert(maxML < 2 GB); longest = (int)maxML; *matchpos = base + matchIndex; /* virtual pos, relative to ip, to retrieve offset */ @@ -410,7 +411,7 @@ LZ4HC_InsertAndGetWiderMatch ( } /* while ((matchIndex>=lowestMatchIndex) && (nbAttempts)) */ if ( dict == usingDictCtxHc - && nbAttempts + && nbAttempts > 0 && ipIndex - lowestMatchIndex < LZ4_DISTANCE_MAX) { size_t const dictEndOffset = (size_t)(dictCtx->end - dictCtx->base); U32 dictMatchIndex = dictCtx->hashTable[LZ4HC_hashPtr(ip)]; @@ -460,74 +461,90 @@ int LZ4HC_InsertAndFindBestMatch(LZ4HC_CCtx_internal* const hc4, /* Index tabl * @return : 0 if ok, * 1 if buffer issue detected */ LZ4_FORCE_INLINE int LZ4HC_encodeSequence ( - const BYTE** ip, - BYTE** op, - const BYTE** anchor, + const BYTE** _ip, + BYTE** _op, + const BYTE** _anchor, int matchLength, const BYTE* const match, limitedOutput_directive limit, BYTE* oend) { +#define ip (*_ip) +#define op (*_op) +#define anchor (*_anchor) + size_t length; - BYTE* const token = (*op)++; + BYTE* const token = op++; #if defined(LZ4_DEBUG) && (LZ4_DEBUG >= 6) static const BYTE* start = NULL; static U32 totalCost = 0; - U32 const pos = (start==NULL) ? 0 : (U32)(*anchor - start); - U32 const ll = (U32)(*ip - *anchor); + U32 const pos = (start==NULL) ? 0 : (U32)(anchor - start); + U32 const ll = (U32)(ip - anchor); U32 const llAdd = (ll>=15) ? ((ll-15) / 255) + 1 : 0; U32 const mlAdd = (matchLength>=19) ? ((matchLength-19) / 255) + 1 : 0; U32 const cost = 1 + llAdd + ll + 2 + mlAdd; - if (start==NULL) start = *anchor; /* only works for single segment */ + if (start==NULL) start = anchor; /* only works for single segment */ /* g_debuglog_enable = (pos >= 2228) & (pos <= 2262); */ - DEBUGLOG(6, "pos:%7u -- literals:%3u, match:%4i, offset:%5u, cost:%3u + %u", + DEBUGLOG(6, "pos:%7u -- literals:%4u, match:%4i, offset:%5u, cost:%4u + %5u", pos, - (U32)(*ip - *anchor), matchLength, (U32)(*ip-match), + (U32)(ip - anchor), matchLength, (U32)(ip-match), cost, totalCost); totalCost += cost; #endif /* Encode Literal length */ - length = (size_t)(*ip - *anchor); - if ((limit) && ((*op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) return 1; /* Check output limit */ + length = (size_t)(ip - anchor); + LZ4_STATIC_ASSERT(notLimited == 0); + /* Check output limit */ + if (limit && ((op + (length / 255) + length + (2 + 1 + LASTLITERALS)) > oend)) { + DEBUGLOG(6, "Not enough room to write %i literals (%i bytes remaining)", + (int)length, (int)(oend - op)); + return 1; + } if (length >= RUN_MASK) { size_t len = length - RUN_MASK; *token = (RUN_MASK << ML_BITS); - for(; len >= 255 ; len -= 255) *(*op)++ = 255; - *(*op)++ = (BYTE)len; + for(; len >= 255 ; len -= 255) *op++ = 255; + *op++ = (BYTE)len; } else { *token = (BYTE)(length << ML_BITS); } /* Copy Literals */ - LZ4_wildCopy8(*op, *anchor, (*op) + length); - *op += length; + LZ4_wildCopy8(op, anchor, op + length); + op += length; /* Encode Offset */ - assert( (*ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ - LZ4_writeLE16(*op, (U16)(*ip-match)); *op += 2; + assert( (ip - match) <= LZ4_DISTANCE_MAX ); /* note : consider providing offset as a value, rather than as a pointer difference */ + LZ4_writeLE16(op, (U16)(ip - match)); op += 2; /* Encode MatchLength */ assert(matchLength >= MINMATCH); length = (size_t)matchLength - MINMATCH; - if ((limit) && (*op + (length / 255) + (1 + LASTLITERALS) > oend)) return 1; /* Check output limit */ + if (limit && (op + (length / 255) + (1 + LASTLITERALS) > oend)) { + DEBUGLOG(6, "Not enough room to write match length"); + return 1; /* Check output limit */ + } if (length >= ML_MASK) { *token += ML_MASK; length -= ML_MASK; - for(; length >= 510 ; length -= 510) { *(*op)++ = 255; *(*op)++ = 255; } - if (length >= 255) { length -= 255; *(*op)++ = 255; } - *(*op)++ = (BYTE)length; + for(; length >= 510 ; length -= 510) { *op++ = 255; *op++ = 255; } + if (length >= 255) { length -= 255; *op++ = 255; } + *op++ = (BYTE)length; } else { *token += (BYTE)(length); } /* Prepare next loop */ - *ip += matchLength; - *anchor = *ip; + ip += matchLength; + anchor = ip; return 0; } +#undef ip +#undef op +#undef anchor LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( LZ4HC_CCtx_internal* const ctx, @@ -535,7 +552,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( char* const dest, int* srcSizePtr, int const maxOutputSize, - unsigned maxNbAttempts, + int maxNbAttempts, const limitedOutput_directive limit, const dictCtx_directive dict ) @@ -565,7 +582,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_hashChain ( /* init */ *srcSizePtr = 0; if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ - if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ + if (inputSize < LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */ /* Main Loop */ while (ip <= mflimit) { @@ -637,7 +654,11 @@ _Search3: if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, limit, oend)) goto _dest_overflow; ip = start2; optr = op; - if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) goto _dest_overflow; + if (LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml2, ref2, limit, oend)) { + ml = ml2; + ref = ref2; + goto _dest_overflow; + } continue; } @@ -709,17 +730,18 @@ _Search3: _last_literals: /* Encode Last Literals */ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + litLength + lastRunSize; + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) return 0; /* Check output limit */ + if (limit == limitedOutput) return 0; /* adapt lastRunSize to fill 'dest' */ - lastRunSize = (size_t)(oend - op) - 1; - litLength = (lastRunSize + 255 - RUN_MASK) / 255; - lastRunSize -= litLength; + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; } - ip = anchor + lastRunSize; + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ if (lastRunSize >= RUN_MASK) { size_t accumulator = lastRunSize - RUN_MASK; @@ -739,9 +761,25 @@ _last_literals: _dest_overflow: if (limit == fillOutput) { + /* Assumption : ip, anchor, ml and ref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing"); op = optr; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ml >= 0); + if ((size_t)ml > maxMlSize) ml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ml >= MFLIMIT) { + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ref, notLimited, oend); + } } goto _last_literals; } + /* compression failed */ return 0; } @@ -752,7 +790,7 @@ static int LZ4HC_compress_optimal( LZ4HC_CCtx_internal* ctx, int const nbSearches, size_t sufficient_len, const limitedOutput_directive limit, int const fullUpdate, const dictCtx_directive dict, - HCfavor_e favorDecSpeed); + const HCfavor_e favorDecSpeed); LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( @@ -769,7 +807,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( typedef enum { lz4hc, lz4opt } lz4hc_strat_e; typedef struct { lz4hc_strat_e strat; - U32 nbSearches; + int nbSearches; U32 targetLength; } cParams_t; static const cParams_t clTable[LZ4HC_CLEVEL_MAX+1] = { @@ -788,7 +826,8 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( { lz4opt,16384,LZ4_OPT_NUM }, /* 12==LZ4HC_CLEVEL_MAX */ }; - DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d)", ctx, src, *srcSizePtr); + DEBUGLOG(4, "LZ4HC_compress_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + ctx, src, *srcSizePtr, limit); if (limit == fillOutput && dstCapacity < 1) return 0; /* Impossible to store anything */ if ((U32)*srcSizePtr > (U32)LZ4_MAX_INPUT_SIZE) return 0; /* Unsupported input size (too large or negative) */ @@ -808,7 +847,7 @@ LZ4_FORCE_INLINE int LZ4HC_compress_generic_internal ( assert(cParam.strat == lz4opt); result = LZ4HC_compress_optimal(ctx, src, dst, srcSizePtr, dstCapacity, - (int)cParam.nbSearches, cParam.targetLength, limit, + cParam.nbSearches, cParam.targetLength, limit, cLevel == LZ4HC_CLEVEL_MAX, /* ultra mode */ dict, favor); } @@ -881,27 +920,22 @@ LZ4HC_compress_generic ( int LZ4_sizeofStateHC(void) { return (int)sizeof(LZ4_streamHC_t); } -#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : - * it reports an aligment of 8-bytes, - * while actually aligning LZ4_streamHC_t on 4 bytes. */ static size_t LZ4_streamHC_t_alignment(void) { - struct { char c; LZ4_streamHC_t t; } t_a; - return sizeof(t_a) - sizeof(t_a.t); -} +#if LZ4_ALIGN_TEST + typedef struct { char c; LZ4_streamHC_t t; } t_a; + return sizeof(t_a) - sizeof(LZ4_streamHC_t); +#else + return 1; /* effectively disabled */ #endif +} /* state is presumed correctly initialized, * in which case its size and alignment have already been validate */ int LZ4_compress_HC_extStateHC_fastReset (void* state, const char* src, char* dst, int srcSize, int dstCapacity, int compressionLevel) { LZ4HC_CCtx_internal* const ctx = &((LZ4_streamHC_t*)state)->internal_donotuse; -#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : - * it reports an aligment of 8-bytes, - * while actually aligning LZ4_streamHC_t on 4 bytes. */ - assert(((size_t)state & (LZ4_streamHC_t_alignment() - 1)) == 0); /* check alignment */ -#endif - if (((size_t)(state)&(sizeof(void*)-1)) != 0) return 0; /* Error : state is not aligned for pointers (32 or 64 bits) */ + if (!LZ4_isAligned(state, LZ4_streamHC_t_alignment())) return 0; LZ4_resetStreamHC_fast((LZ4_streamHC_t*)state, compressionLevel); LZ4HC_init_internal (ctx, (const BYTE*)src); if (dstCapacity < LZ4_compressBound(srcSize)) @@ -950,10 +984,11 @@ int LZ4_compress_HC_destSize(void* state, const char* source, char* dest, int* s /* allocation */ LZ4_streamHC_t* LZ4_createStreamHC(void) { - LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)ALLOC(sizeof(LZ4_streamHC_t)); - if (LZ4_streamHCPtr==NULL) return NULL; - LZ4_initStreamHC(LZ4_streamHCPtr, sizeof(*LZ4_streamHCPtr)); /* full initialization, malloc'ed buffer can be full of garbage */ - return LZ4_streamHCPtr; + LZ4_streamHC_t* const state = + (LZ4_streamHC_t*)ALLOC_AND_ZERO(sizeof(LZ4_streamHC_t)); + if (state == NULL) return NULL; + LZ4_setCompressionLevel(state, LZ4HC_CLEVEL_DEFAULT); + return state; } int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) @@ -968,22 +1003,16 @@ int LZ4_freeStreamHC (LZ4_streamHC_t* LZ4_streamHCPtr) LZ4_streamHC_t* LZ4_initStreamHC (void* buffer, size_t size) { LZ4_streamHC_t* const LZ4_streamHCPtr = (LZ4_streamHC_t*)buffer; - if (buffer == NULL) return NULL; - if (size < sizeof(LZ4_streamHC_t)) return NULL; -#ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 : - * it reports an aligment of 8-bytes, - * while actually aligning LZ4_streamHC_t on 4 bytes. */ - if (((size_t)buffer) & (LZ4_streamHC_t_alignment() - 1)) return NULL; /* alignment check */ -#endif /* if compilation fails here, LZ4_STREAMHCSIZE must be increased */ LZ4_STATIC_ASSERT(sizeof(LZ4HC_CCtx_internal) <= LZ4_STREAMHCSIZE); - DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", LZ4_streamHCPtr, (unsigned)size); - /* end-base will trigger a clearTable on starting compression */ - LZ4_streamHCPtr->internal_donotuse.end = (const BYTE *)(ptrdiff_t)-1; - LZ4_streamHCPtr->internal_donotuse.base = NULL; - LZ4_streamHCPtr->internal_donotuse.dictCtx = NULL; - LZ4_streamHCPtr->internal_donotuse.favorDecSpeed = 0; - LZ4_streamHCPtr->internal_donotuse.dirty = 0; + DEBUGLOG(4, "LZ4_initStreamHC(%p, %u)", buffer, (unsigned)size); + /* check conditions */ + if (buffer == NULL) return NULL; + if (size < sizeof(LZ4_streamHC_t)) return NULL; + if (!LZ4_isAligned(buffer, LZ4_streamHC_t_alignment())) return NULL; + /* init */ + { LZ4HC_CCtx_internal* const hcstate = &(LZ4_streamHCPtr->internal_donotuse); + MEM_INIT(hcstate, 0, sizeof(*hcstate)); } LZ4_setCompressionLevel(LZ4_streamHCPtr, LZ4HC_CLEVEL_DEFAULT); return LZ4_streamHCPtr; } @@ -1028,7 +1057,7 @@ int LZ4_loadDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, const char* dictionary, int dictSize) { LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - DEBUGLOG(4, "LZ4_loadDictHC(%p, %p, %d)", LZ4_streamHCPtr, dictionary, dictSize); + DEBUGLOG(4, "LZ4_loadDictHC(ctx:%p, dict:%p, dictSize:%d)", LZ4_streamHCPtr, dictionary, dictSize); assert(LZ4_streamHCPtr != NULL); if (dictSize > 64 KB) { dictionary += (size_t)dictSize - 64 KB; @@ -1069,14 +1098,15 @@ static void LZ4HC_setExternalDict(LZ4HC_CCtx_internal* ctxPtr, const BYTE* newBl ctxPtr->dictCtx = NULL; } -static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, - const char* src, char* dst, - int* srcSizePtr, int dstCapacity, - limitedOutput_directive limit) +static int +LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, + const char* src, char* dst, + int* srcSizePtr, int dstCapacity, + limitedOutput_directive limit) { LZ4HC_CCtx_internal* const ctxPtr = &LZ4_streamHCPtr->internal_donotuse; - DEBUGLOG(4, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d)", - LZ4_streamHCPtr, src, *srcSizePtr); + DEBUGLOG(5, "LZ4_compressHC_continue_generic(ctx=%p, src=%p, srcSize=%d, limit=%d)", + LZ4_streamHCPtr, src, *srcSizePtr, limit); assert(ctxPtr != NULL); /* auto-init if forgotten */ if (ctxPtr->base == NULL) LZ4HC_init_internal (ctxPtr, (const BYTE*) src); @@ -1100,8 +1130,7 @@ static int LZ4_compressHC_continue_generic (LZ4_streamHC_t* LZ4_streamHCPtr, if (sourceEnd > dictEnd) sourceEnd = dictEnd; ctxPtr->lowLimit = (U32)(sourceEnd - ctxPtr->dictBase); if (ctxPtr->dictLimit - ctxPtr->lowLimit < 4) ctxPtr->lowLimit = ctxPtr->dictLimit; - } - } + } } return LZ4HC_compress_generic (ctxPtr, src, dst, srcSizePtr, dstCapacity, ctxPtr->compressionLevel, limit); } @@ -1121,23 +1150,30 @@ int LZ4_compress_HC_continue_destSize (LZ4_streamHC_t* LZ4_streamHCPtr, const ch -/* dictionary saving */ - +/* LZ4_saveDictHC : + * save history content + * into a user-provided buffer + * which is then used to continue compression + */ int LZ4_saveDictHC (LZ4_streamHC_t* LZ4_streamHCPtr, char* safeBuffer, int dictSize) { LZ4HC_CCtx_internal* const streamPtr = &LZ4_streamHCPtr->internal_donotuse; int const prefixSize = (int)(streamPtr->end - (streamPtr->base + streamPtr->dictLimit)); - DEBUGLOG(4, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); + DEBUGLOG(5, "LZ4_saveDictHC(%p, %p, %d)", LZ4_streamHCPtr, safeBuffer, dictSize); + assert(prefixSize >= 0); if (dictSize > 64 KB) dictSize = 64 KB; if (dictSize < 4) dictSize = 0; if (dictSize > prefixSize) dictSize = prefixSize; - memmove(safeBuffer, streamPtr->end - dictSize, dictSize); + if (safeBuffer == NULL) assert(dictSize == 0); + if (dictSize > 0) + memmove(safeBuffer, streamPtr->end - dictSize, dictSize); { U32 const endIndex = (U32)(streamPtr->end - streamPtr->base); streamPtr->end = (const BYTE*)safeBuffer + dictSize; streamPtr->base = streamPtr->end - endIndex; streamPtr->dictLimit = endIndex - (U32)dictSize; streamPtr->lowLimit = endIndex - (U32)dictSize; - if (streamPtr->nextToUpdate < streamPtr->dictLimit) streamPtr->nextToUpdate = streamPtr->dictLimit; + if (streamPtr->nextToUpdate < streamPtr->dictLimit) + streamPtr->nextToUpdate = streamPtr->dictLimit; } return dictSize; } @@ -1287,8 +1323,13 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, const dictCtx_directive dict, const HCfavor_e favorDecSpeed) { + int retval = 0; #define TRAILING_LITERALS 3 +#ifdef LZ4HC_HEAPMODE + LZ4HC_optimal_t* const opt = (LZ4HC_optimal_t*)ALLOC(sizeof(LZ4HC_optimal_t) * (LZ4_OPT_NUM + TRAILING_LITERALS)); +#else LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* ~64 KB, which is a bit large for stack... */ +#endif const BYTE* ip = (const BYTE*) source; const BYTE* anchor = ip; @@ -1298,15 +1339,19 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, BYTE* op = (BYTE*) dst; BYTE* opSaved = (BYTE*) dst; BYTE* oend = op + dstCapacity; + int ovml = MINMATCH; /* overflow - last sequence */ + const BYTE* ovref = NULL; /* init */ +#ifdef LZ4HC_HEAPMODE + if (opt == NULL) goto _return_label; +#endif DEBUGLOG(5, "LZ4HC_compress_optimal(dst=%p, dstCapa=%u)", dst, (unsigned)dstCapacity); *srcSizePtr = 0; if (limit == fillOutput) oend -= LASTLITERALS; /* Hack for support LZ4 format restriction */ if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; /* Main Loop */ - assert(ip - anchor < LZ4_MAX_INPUT_SIZE); while (ip <= mflimit) { int const llen = (int)(ip - anchor); int best_mlen, best_off; @@ -1320,8 +1365,11 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, int const firstML = firstMatch.len; const BYTE* const matchPos = ip - firstMatch.off; opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), firstML, matchPos, limit, oend) ) { /* updates ip, op and anchor */ + ovml = firstML; + ovref = matchPos; goto _dest_overflow; + } continue; } @@ -1463,7 +1511,7 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, best_off = opt[last_match_pos].off; cur = last_match_pos - best_mlen; - encode: /* cur, last_match_pos, best_mlen, best_off must be set */ +encode: /* cur, last_match_pos, best_mlen, best_off must be set */ assert(cur < LZ4_OPT_NUM); assert(last_match_pos >= 1); /* == 1 when only one candidate */ DEBUGLOG(6, "reverse traversal, looking for shortest path (last_match_pos=%i)", last_match_pos); @@ -1493,25 +1541,31 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, assert(ml >= MINMATCH); assert((offset >= 1) && (offset <= LZ4_DISTANCE_MAX)); opSaved = op; - if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */ + if ( LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ml, ip - offset, limit, oend) ) { /* updates ip, op and anchor */ + ovml = ml; + ovref = ip - offset; goto _dest_overflow; - } } + } } } } /* while (ip <= mflimit) */ - _last_literals: +_last_literals: /* Encode Last Literals */ { size_t lastRunSize = (size_t)(iend - anchor); /* literals */ - size_t litLength = (lastRunSize + 255 - RUN_MASK) / 255; - size_t const totalSize = 1 + litLength + lastRunSize; + size_t llAdd = (lastRunSize + 255 - RUN_MASK) / 255; + size_t const totalSize = 1 + llAdd + lastRunSize; if (limit == fillOutput) oend += LASTLITERALS; /* restore correct value */ if (limit && (op + totalSize > oend)) { - if (limit == limitedOutput) return 0; /* Check output limit */ + if (limit == limitedOutput) { /* Check output limit */ + retval = 0; + goto _return_label; + } /* adapt lastRunSize to fill 'dst' */ - lastRunSize = (size_t)(oend - op) - 1; - litLength = (lastRunSize + 255 - RUN_MASK) / 255; - lastRunSize -= litLength; + lastRunSize = (size_t)(oend - op) - 1 /*token*/; + llAdd = (lastRunSize + 256 - RUN_MASK) / 256; + lastRunSize -= llAdd; } - ip = anchor + lastRunSize; + DEBUGLOG(6, "Final literal run : %i literals", (int)lastRunSize); + ip = anchor + lastRunSize; /* can be != iend if limit==fillOutput */ if (lastRunSize >= RUN_MASK) { size_t accumulator = lastRunSize - RUN_MASK; @@ -1527,12 +1581,35 @@ static int LZ4HC_compress_optimal ( LZ4HC_CCtx_internal* ctx, /* End */ *srcSizePtr = (int) (((const char*)ip) - source); - return (int) ((char*)op-dst); + retval = (int) ((char*)op-dst); + goto _return_label; - _dest_overflow: - if (limit == fillOutput) { - op = opSaved; /* restore correct out pointer */ - goto _last_literals; - } - return 0; - } +_dest_overflow: +if (limit == fillOutput) { + /* Assumption : ip, anchor, ovml and ovref must be set correctly */ + size_t const ll = (size_t)(ip - anchor); + size_t const ll_addbytes = (ll + 240) / 255; + size_t const ll_totalCost = 1 + ll_addbytes + ll; + BYTE* const maxLitPos = oend - 3; /* 2 for offset, 1 for token */ + DEBUGLOG(6, "Last sequence overflowing (only %i bytes remaining)", (int)(oend-1-opSaved)); + op = opSaved; /* restore correct out pointer */ + if (op + ll_totalCost <= maxLitPos) { + /* ll validated; now adjust match length */ + size_t const bytesLeftForMl = (size_t)(maxLitPos - (op+ll_totalCost)); + size_t const maxMlSize = MINMATCH + (ML_MASK-1) + (bytesLeftForMl * 255); + assert(maxMlSize < INT_MAX); assert(ovml >= 0); + if ((size_t)ovml > maxMlSize) ovml = (int)maxMlSize; + if ((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1 + ovml >= MFLIMIT) { + DEBUGLOG(6, "Space to end : %i + ml (%i)", (int)((oend + LASTLITERALS) - (op + ll_totalCost + 2) - 1), ovml); + DEBUGLOG(6, "Before : ip = %p, anchor = %p", ip, anchor); + LZ4HC_encodeSequence(UPDATABLE(ip, op, anchor), ovml, ovref, notLimited, oend); + DEBUGLOG(6, "After : ip = %p, anchor = %p", ip, anchor); + } } + goto _last_literals; +} +_return_label: +#ifdef LZ4HC_HEAPMODE + FREEMEM(opt); +#endif + return retval; +} diff --git a/lib/lz4hc.h b/lib/lz4hc.h index 44e35bb..3d441fb 100644 --- a/lib/lz4hc.h +++ b/lib/lz4hc.h @@ -198,57 +198,32 @@ LZ4LIB_API int LZ4_saveDictHC (LZ4_streamHC_t* streamHCPtr, char* safeBuffer, in #define LZ4HC_HASH_MASK (LZ4HC_HASHTABLESIZE - 1) -#if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) -#include - typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal; struct LZ4HC_CCtx_internal { - uint32_t hashTable[LZ4HC_HASHTABLESIZE]; - uint16_t chainTable[LZ4HC_MAXD]; - const uint8_t* end; /* next block here to continue on current prefix */ - const uint8_t* base; /* All index relative to this position */ - const uint8_t* dictBase; /* alternate base for extDict */ - uint32_t dictLimit; /* below that point, need extDict */ - uint32_t lowLimit; /* below that point, no more dict */ - uint32_t nextToUpdate; /* index from which to continue dictionary update */ - short compressionLevel; - int8_t favorDecSpeed; /* favor decompression speed if this flag set, - otherwise, favor compression ratio */ - int8_t dirty; /* stream has to be fully reset if this flag is set */ + LZ4_u32 hashTable[LZ4HC_HASHTABLESIZE]; + LZ4_u16 chainTable[LZ4HC_MAXD]; + const LZ4_byte* end; /* next block here to continue on current prefix */ + const LZ4_byte* base; /* All index relative to this position */ + const LZ4_byte* dictBase; /* alternate base for extDict */ + LZ4_u32 dictLimit; /* below that point, need extDict */ + LZ4_u32 lowLimit; /* below that point, no more dict */ + LZ4_u32 nextToUpdate; /* index from which to continue dictionary update */ + short compressionLevel; + LZ4_i8 favorDecSpeed; /* favor decompression speed if this flag set, + otherwise, favor compression ratio */ + LZ4_i8 dirty; /* stream has to be fully reset if this flag is set */ const LZ4HC_CCtx_internal* dictCtx; }; -#else - -typedef struct LZ4HC_CCtx_internal LZ4HC_CCtx_internal; -struct LZ4HC_CCtx_internal -{ - unsigned int hashTable[LZ4HC_HASHTABLESIZE]; - unsigned short chainTable[LZ4HC_MAXD]; - const unsigned char* end; /* next block here to continue on current prefix */ - const unsigned char* base; /* All index relative to this position */ - const unsigned char* dictBase; /* alternate base for extDict */ - unsigned int dictLimit; /* below that point, need extDict */ - unsigned int lowLimit; /* below that point, no more dict */ - unsigned int nextToUpdate; /* index from which to continue dictionary update */ - short compressionLevel; - char favorDecSpeed; /* favor decompression speed if this flag set, - otherwise, favor compression ratio */ - char dirty; /* stream has to be fully reset if this flag is set */ - const LZ4HC_CCtx_internal* dictCtx; -}; - -#endif - /* Do not use these definitions directly ! * Declare or allocate an LZ4_streamHC_t instead. */ -#define LZ4_STREAMHCSIZE (4*LZ4HC_HASHTABLESIZE + 2*LZ4HC_MAXD + 56 + ((sizeof(void*)==16) ? 56 : 0) /* AS400*/ ) /* 262200 or 262256*/ -#define LZ4_STREAMHCSIZE_SIZET (LZ4_STREAMHCSIZE / sizeof(size_t)) +#define LZ4_STREAMHCSIZE 262200 /* static size, for inter-version compatibility */ +#define LZ4_STREAMHCSIZE_VOIDP (LZ4_STREAMHCSIZE / sizeof(void*)) union LZ4_streamHC_u { - size_t table[LZ4_STREAMHCSIZE_SIZET]; + void* table[LZ4_STREAMHCSIZE_VOIDP]; LZ4HC_CCtx_internal internal_donotuse; }; /* previously typedef'd to LZ4_streamHC_t */ diff --git a/ossfuzz/Makefile b/ossfuzz/Makefile index 6875eb6..2ec1675 100644 --- a/ossfuzz/Makefile +++ b/ossfuzz/Makefile @@ -26,7 +26,7 @@ # ########################################################################## LZ4DIR := ../lib -LIB_FUZZING_ENGINE ?= standaloneengine.o +LIB_FUZZING_ENGINE ?= DEBUGLEVEL?= 1 DEBUGFLAGS = -g -DLZ4_DEBUG=$(DEBUGLEVEL) @@ -47,6 +47,7 @@ FUZZERS := \ round_trip_frame_fuzzer \ decompress_frame_fuzzer +.PHONY: all all: $(FUZZERS) # Include a rule to build the static library if calling this target @@ -58,17 +59,20 @@ $(LZ4DIR)/liblz4.a: $(CC) -c $(LZ4_CFLAGS) $(LZ4_CPPFLAGS) $< -o $@ # Generic rule for generating fuzzers -%_fuzzer: %_fuzzer.o lz4_helpers.o $(LZ4DIR)/liblz4.a - # Compile the standalone code just in case. The OSS-Fuzz code might - # override the LIB_FUZZING_ENGINE value to "-fsanitize=fuzzer" - $(CC) -c $(LZ4_CFLAGS) $(LZ4_CPPFLAGS) standaloneengine.c -o standaloneengine.o - - # Now compile the actual fuzzer. +ifeq ($(LIB_FUZZING_ENGINE),) + LIB_FUZZING_DEPS := standaloneengine.o +else + LIB_FUZZING_DEPS := +endif +%_fuzzer: %_fuzzer.o lz4_helpers.o fuzz_data_producer.o $(LZ4DIR)/liblz4.a $(LIB_FUZZING_DEPS) $(CXX) $(LZ4_CXXFLAGS) $(LZ4_CPPFLAGS) $(LDFLAGS) $(LIB_FUZZING_ENGINE) $^ -o $@$(EXT) %_fuzzer_clean: $(RM) $*_fuzzer $*_fuzzer.o standaloneengine.o .PHONY: clean -clean: compress_fuzzer_clean decompress_fuzzer_clean +clean: compress_fuzzer_clean decompress_fuzzer_clean \ + compress_frame_fuzzer_clean compress_hc_fuzzer_clean \ + decompress_frame_fuzzer_clean round_trip_frame_fuzzer_clean \ + round_trip_fuzzer_clean round_trip_hc_fuzzer_clean round_trip_stream_fuzzer_clean $(MAKE) -C $(LZ4DIR) clean diff --git a/ossfuzz/compress_frame_fuzzer.c b/ossfuzz/compress_frame_fuzzer.c index 75c609f..568ae14 100644 --- a/ossfuzz/compress_frame_fuzzer.c +++ b/ossfuzz/compress_frame_fuzzer.c @@ -13,18 +13,23 @@ #include "lz4.h" #include "lz4frame.h" #include "lz4_helpers.h" +#include "fuzz_data_producer.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - uint32_t seed = FUZZ_seed(&data, &size); - LZ4F_preferences_t const prefs = FUZZ_randomPreferences(&seed); + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size); + LZ4F_preferences_t const prefs = FUZZ_dataProducer_preferences(producer); + size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer); + size = FUZZ_dataProducer_remainingBytes(producer); + size_t const compressBound = LZ4F_compressFrameBound(size, &prefs); - size_t const dstCapacity = FUZZ_rand32(&seed, 0, compressBound); + size_t const dstCapacity = FUZZ_getRange_from_uint32(dstCapacitySeed, 0, compressBound); + char* const dst = (char*)malloc(dstCapacity); char* const rt = (char*)malloc(size); - FUZZ_ASSERT(dst); - FUZZ_ASSERT(rt); + FUZZ_ASSERT(dst!=NULL); + FUZZ_ASSERT(rt!=NULL); /* If compression succeeds it must round trip correctly. */ size_t const dstSize = @@ -37,6 +42,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) free(dst); free(rt); + FUZZ_dataProducer_free(producer); return 0; } diff --git a/ossfuzz/compress_fuzzer.c b/ossfuzz/compress_fuzzer.c index 7021624..edc8aad 100644 --- a/ossfuzz/compress_fuzzer.c +++ b/ossfuzz/compress_fuzzer.c @@ -10,12 +10,18 @@ #include #include "fuzz_helpers.h" +#include "fuzz_data_producer.h" #include "lz4.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - uint32_t seed = FUZZ_seed(&data, &size); - size_t const dstCapacity = FUZZ_rand32(&seed, 0, LZ4_compressBound(size)); + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size); + size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer); + size = FUZZ_dataProducer_remainingBytes(producer); + + size_t const compressBound = LZ4_compressBound(size); + size_t const dstCapacity = FUZZ_getRange_from_uint32(dstCapacitySeed, 0, compressBound); + char* const dst = (char*)malloc(dstCapacity); char* const rt = (char*)malloc(size); @@ -46,6 +52,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) free(dst); free(rt); + FUZZ_dataProducer_free(producer); return 0; } diff --git a/ossfuzz/compress_hc_fuzzer.c b/ossfuzz/compress_hc_fuzzer.c index 4841367..7d8e45a 100644 --- a/ossfuzz/compress_hc_fuzzer.c +++ b/ossfuzz/compress_hc_fuzzer.c @@ -10,16 +10,22 @@ #include #include "fuzz_helpers.h" +#include "fuzz_data_producer.h" #include "lz4.h" #include "lz4hc.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - uint32_t seed = FUZZ_seed(&data, &size); - size_t const dstCapacity = FUZZ_rand32(&seed, 0, LZ4_compressBound(size)); + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size); + size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer); + size_t const levelSeed = FUZZ_dataProducer_retrieve32(producer); + size = FUZZ_dataProducer_remainingBytes(producer); + + size_t const dstCapacity = FUZZ_getRange_from_uint32(dstCapacitySeed, 0, size); + int const level = FUZZ_getRange_from_uint32(levelSeed, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX); + char* const dst = (char*)malloc(dstCapacity); char* const rt = (char*)malloc(size); - int const level = FUZZ_rand32(&seed, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX); FUZZ_ASSERT(dst); FUZZ_ASSERT(rt); @@ -52,6 +58,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) free(dst); free(rt); + FUZZ_dataProducer_free(producer); return 0; } diff --git a/ossfuzz/decompress_frame_fuzzer.c b/ossfuzz/decompress_frame_fuzzer.c index bda25b0..0fcbb16 100644 --- a/ossfuzz/decompress_frame_fuzzer.c +++ b/ossfuzz/decompress_frame_fuzzer.c @@ -9,6 +9,7 @@ #include #include "fuzz_helpers.h" +#include "fuzz_data_producer.h" #include "lz4.h" #define LZ4F_STATIC_LINKING_ONLY #include "lz4frame.h" @@ -29,11 +30,17 @@ static void decompress(LZ4F_dctx* dctx, void* dst, size_t dstCapacity, int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size); + size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer); + size_t const dictSizeSeed = FUZZ_dataProducer_retrieve32(producer); + size = FUZZ_dataProducer_remainingBytes(producer); - uint32_t seed = FUZZ_seed(&data, &size); - size_t const dstCapacity = FUZZ_rand32(&seed, 0, 4 * size); + size_t const dstCapacity = FUZZ_getRange_from_uint32( + dstCapacitySeed, 0, 4 * size); size_t const largeDictSize = 64 * 1024; - size_t const dictSize = FUZZ_rand32(&seed, 0, largeDictSize); + size_t const dictSize = FUZZ_getRange_from_uint32( + dictSizeSeed, 0, largeDictSize); + char* const dst = (char*)malloc(dstCapacity); char* const dict = (char*)malloc(dictSize); LZ4F_decompressOptions_t opts; @@ -62,6 +69,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) LZ4F_freeDecompressionContext(dctx); free(dst); free(dict); + FUZZ_dataProducer_free(producer); return 0; } diff --git a/ossfuzz/decompress_fuzzer.c b/ossfuzz/decompress_fuzzer.c index 0267c93..6f48e30 100644 --- a/ossfuzz/decompress_fuzzer.c +++ b/ossfuzz/decompress_fuzzer.c @@ -9,13 +9,16 @@ #include #include "fuzz_helpers.h" +#include "fuzz_data_producer.h" #include "lz4.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size); + size_t const dstCapacitySeed = FUZZ_dataProducer_retrieve32(producer); + size = FUZZ_dataProducer_remainingBytes(producer); - uint32_t seed = FUZZ_seed(&data, &size); - size_t const dstCapacity = FUZZ_rand32(&seed, 0, 4 * size); + size_t const dstCapacity = FUZZ_getRange_from_uint32(dstCapacitySeed, 0, 4 * size); size_t const smallDictSize = size + 1; size_t const largeDictSize = 64 * 1024 - 1; size_t const dictSize = MAX(smallDictSize, largeDictSize); @@ -53,6 +56,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) dstCapacity, dstCapacity); free(dst); free(dict); + FUZZ_dataProducer_free(producer); return 0; } diff --git a/ossfuzz/fuzz_data_producer.c b/ossfuzz/fuzz_data_producer.c new file mode 100644 index 0000000..670fbf5 --- /dev/null +++ b/ossfuzz/fuzz_data_producer.c @@ -0,0 +1,77 @@ +#include "fuzz_data_producer.h" + +struct FUZZ_dataProducer_s{ + const uint8_t *data; + size_t size; +}; + +FUZZ_dataProducer_t* FUZZ_dataProducer_create(const uint8_t* data, size_t size) { + FUZZ_dataProducer_t* const producer = malloc(sizeof(FUZZ_dataProducer_t)); + + FUZZ_ASSERT(producer != NULL); + + producer->data = data; + producer->size = size; + return producer; +} + +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer) { free(producer); } + +uint32_t FUZZ_dataProducer_retrieve32(FUZZ_dataProducer_t *producer) { + const uint8_t* data = producer->data; + const size_t size = producer->size; + if (size == 0) { + return 0; + } else if (size < 4) { + producer->size -= 1; + return (uint32_t)data[size - 1]; + } else { + producer->size -= 4; + return *(data + size - 4); + } +} + +uint32_t FUZZ_getRange_from_uint32(uint32_t seed, uint32_t min, uint32_t max) +{ + uint32_t range = max - min; + if (range == 0xffffffff) { + return seed; + } + return min + seed % (range + 1); +} + +uint32_t FUZZ_dataProducer_range32(FUZZ_dataProducer_t* producer, + uint32_t min, uint32_t max) +{ + size_t const seed = FUZZ_dataProducer_retrieve32(producer); + return FUZZ_getRange_from_uint32(seed, min, max); +} + +LZ4F_frameInfo_t FUZZ_dataProducer_frameInfo(FUZZ_dataProducer_t* producer) +{ + LZ4F_frameInfo_t info = LZ4F_INIT_FRAMEINFO; + info.blockSizeID = FUZZ_dataProducer_range32(producer, LZ4F_max64KB - 1, LZ4F_max4MB); + if (info.blockSizeID < LZ4F_max64KB) { + info.blockSizeID = LZ4F_default; + } + info.blockMode = FUZZ_dataProducer_range32(producer, LZ4F_blockLinked, LZ4F_blockIndependent); + info.contentChecksumFlag = FUZZ_dataProducer_range32(producer, LZ4F_noContentChecksum, + LZ4F_contentChecksumEnabled); + info.blockChecksumFlag = FUZZ_dataProducer_range32(producer, LZ4F_noBlockChecksum, + LZ4F_blockChecksumEnabled); + return info; +} + +LZ4F_preferences_t FUZZ_dataProducer_preferences(FUZZ_dataProducer_t* producer) +{ + LZ4F_preferences_t prefs = LZ4F_INIT_PREFERENCES; + prefs.frameInfo = FUZZ_dataProducer_frameInfo(producer); + prefs.compressionLevel = FUZZ_dataProducer_range32(producer, 0, LZ4HC_CLEVEL_MAX + 3) - 3; + prefs.autoFlush = FUZZ_dataProducer_range32(producer, 0, 1); + prefs.favorDecSpeed = FUZZ_dataProducer_range32(producer, 0, 1); + return prefs; +} + +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer){ + return producer->size; +} diff --git a/ossfuzz/fuzz_data_producer.h b/ossfuzz/fuzz_data_producer.h new file mode 100644 index 0000000..b96dcba --- /dev/null +++ b/ossfuzz/fuzz_data_producer.h @@ -0,0 +1,36 @@ +#include +#include +#include +#include + +#include "fuzz_helpers.h" +#include "lz4frame.h" +#include "lz4hc.h" + +/* Struct used for maintaining the state of the data */ +typedef struct FUZZ_dataProducer_s FUZZ_dataProducer_t; + +/* Returns a data producer state struct. Use for producer initialization. */ +FUZZ_dataProducer_t *FUZZ_dataProducer_create(const uint8_t *data, size_t size); + +/* Frees the data producer */ +void FUZZ_dataProducer_free(FUZZ_dataProducer_t *producer); + +/* Returns 32 bits from the end of data */ +uint32_t FUZZ_dataProducer_retrieve32(FUZZ_dataProducer_t *producer); + +/* Returns value between [min, max] */ +uint32_t FUZZ_getRange_from_uint32(uint32_t seed, uint32_t min, uint32_t max); + +/* Combination of above two functions for non adaptive use cases. ie where size is not involved */ +uint32_t FUZZ_dataProducer_range32(FUZZ_dataProducer_t *producer, uint32_t min, + uint32_t max); + +/* Returns lz4 preferences */ +LZ4F_preferences_t FUZZ_dataProducer_preferences(FUZZ_dataProducer_t* producer); + +/* Returns lz4 frame info */ +LZ4F_frameInfo_t FUZZ_dataProducer_frameInfo(FUZZ_dataProducer_t* producer); + +/* Returns the size of the remaining bytes of data in the producer */ +size_t FUZZ_dataProducer_remainingBytes(FUZZ_dataProducer_t *producer); diff --git a/ossfuzz/round_trip_frame_fuzzer.c b/ossfuzz/round_trip_frame_fuzzer.c index 1eea90c..149542d 100644 --- a/ossfuzz/round_trip_frame_fuzzer.c +++ b/ossfuzz/round_trip_frame_fuzzer.c @@ -12,14 +12,17 @@ #include "lz4.h" #include "lz4frame.h" #include "lz4_helpers.h" +#include "fuzz_data_producer.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - uint32_t seed = FUZZ_seed(&data, &size); - LZ4F_preferences_t const prefs = FUZZ_randomPreferences(&seed); - size_t const dstCapacity = LZ4F_compressFrameBound(size, &prefs); + FUZZ_dataProducer_t* producer = FUZZ_dataProducer_create(data, size); + LZ4F_preferences_t const prefs = FUZZ_dataProducer_preferences(producer); + size = FUZZ_dataProducer_remainingBytes(producer); + + size_t const dstCapacity = LZ4F_compressFrameBound(LZ4_compressBound(size), &prefs); char* const dst = (char*)malloc(dstCapacity); - char* const rt = (char*)malloc(size); + char* const rt = (char*)malloc(FUZZ_dataProducer_remainingBytes(producer)); FUZZ_ASSERT(dst); FUZZ_ASSERT(rt); @@ -34,6 +37,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) free(dst); free(rt); + FUZZ_dataProducer_free(producer); return 0; } diff --git a/ossfuzz/round_trip_fuzzer.c b/ossfuzz/round_trip_fuzzer.c index 3a66e80..6307058 100644 --- a/ossfuzz/round_trip_fuzzer.c +++ b/ossfuzz/round_trip_fuzzer.c @@ -10,11 +10,17 @@ #include "fuzz_helpers.h" #include "lz4.h" +#include "fuzz_data_producer.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - uint32_t seed = FUZZ_seed(&data, &size); + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size); + size_t const partialCapacitySeed = FUZZ_dataProducer_retrieve32(producer); + size = FUZZ_dataProducer_remainingBytes(producer); + + size_t const partialCapacity = FUZZ_getRange_from_uint32(partialCapacitySeed, 0, size); size_t const dstCapacity = LZ4_compressBound(size); + char* const dst = (char*)malloc(dstCapacity); char* const rt = (char*)malloc(size); @@ -32,7 +38,6 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) /* Partial decompression must succeed. */ { - size_t const partialCapacity = FUZZ_rand32(&seed, 0, size); char* const partial = (char*)malloc(partialCapacity); FUZZ_ASSERT(partial); int const partialSize = LZ4_decompress_safe_partial( @@ -43,8 +48,10 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) free(partial); } + free(dst); free(rt); + FUZZ_dataProducer_free(producer); return 0; } diff --git a/ossfuzz/round_trip_hc_fuzzer.c b/ossfuzz/round_trip_hc_fuzzer.c index 325cdf0..7d03ee2 100644 --- a/ossfuzz/round_trip_hc_fuzzer.c +++ b/ossfuzz/round_trip_hc_fuzzer.c @@ -9,16 +9,20 @@ #include #include "fuzz_helpers.h" +#include "fuzz_data_producer.h" #include "lz4.h" #include "lz4hc.h" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - uint32_t seed = FUZZ_seed(&data, &size); + FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(data, size); + int const level = FUZZ_dataProducer_range32(producer, + LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX); + size = FUZZ_dataProducer_remainingBytes(producer); + size_t const dstCapacity = LZ4_compressBound(size); char* const dst = (char*)malloc(dstCapacity); char* const rt = (char*)malloc(size); - int const level = FUZZ_rand32(&seed, LZ4HC_CLEVEL_MIN, LZ4HC_CLEVEL_MAX); FUZZ_ASSERT(dst); FUZZ_ASSERT(rt); @@ -34,6 +38,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) free(dst); free(rt); + FUZZ_dataProducer_free(producer); return 0; } diff --git a/ossfuzz/travisoss.sh b/ossfuzz/travisoss.sh index 5ea884c..eae9a80 100755 --- a/ossfuzz/travisoss.sh +++ b/ossfuzz/travisoss.sh @@ -12,7 +12,12 @@ then fi # Modify the oss-fuzz Dockerfile so that we're checking out the current branch on travis. -sed -i "s@https://github.com/lz4/lz4.git@-b $TRAVIS_BRANCH https://github.com/lz4/lz4.git@" /tmp/ossfuzz/projects/lz4/Dockerfile +if [ "x${TRAVIS_PULL_REQUEST}" = "xfalse" ] +then + sed -i "s@https://github.com/lz4/lz4.git@-b ${TRAVIS_BRANCH} https://github.com/lz4/lz4.git@" /tmp/ossfuzz/projects/lz4/Dockerfile +else + sed -i "s@https://github.com/lz4/lz4.git@-b ${TRAVIS_PULL_REQUEST_BRANCH} https://github.com/${TRAVIS_PULL_REQUEST_SLUG}.git@" /tmp/ossfuzz/projects/lz4/Dockerfile +fi # Try and build the fuzzers pushd /tmp/ossfuzz diff --git a/programs/.gitignore b/programs/.gitignore new file mode 100644 index 0000000..9ffadd9 --- /dev/null +++ b/programs/.gitignore @@ -0,0 +1,21 @@ +# local binary (Makefile) +lz4 +unlz4 +lz4cat +lz4c +lz4c32 +lz4-wlib +datagen +frametest +frametest32 +fullbench +fullbench32 +fuzzer +fuzzer32 +*.exe + +# tests files +tmp* + +# artefacts +*.dSYM diff --git a/programs/COPYING b/programs/COPYING new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/programs/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/programs/Makefile b/programs/Makefile new file mode 100644 index 0000000..c1053f6 --- /dev/null +++ b/programs/Makefile @@ -0,0 +1,187 @@ +# ########################################################################## +# LZ4 programs - Makefile +# Copyright (C) Yann Collet 2011-2017 +# +# This Makefile is validated for Linux, macOS, *BSD, Hurd, Solaris, MSYS2 targets +# +# GPL v2 License +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# You can contact the author at : +# - LZ4 homepage : http://www.lz4.org +# - LZ4 source repository : https://github.com/lz4/lz4 +# ########################################################################## +# lz4 : Command Line Utility, supporting gzip-like arguments +# lz4c : CLU, supporting also legacy lz4demo arguments +# lz4c32: Same as lz4c, but forced to compile in 32-bits mode +# ########################################################################## + +# Version numbers +LZ4DIR := ../lib +LIBVER_SRC := $(LZ4DIR)/lz4.h +LIBVER_MAJOR_SCRIPT:=`sed -n '/define LZ4_VERSION_MAJOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` +LIBVER_MINOR_SCRIPT:=`sed -n '/define LZ4_VERSION_MINOR/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` +LIBVER_PATCH_SCRIPT:=`sed -n '/define LZ4_VERSION_RELEASE/s/.*[[:blank:]]\([0-9][0-9]*\).*/\1/p' < $(LIBVER_SRC)` +LIBVER_SCRIPT:= $(LIBVER_MAJOR_SCRIPT).$(LIBVER_MINOR_SCRIPT).$(LIBVER_PATCH_SCRIPT) +LIBVER_MAJOR := $(shell echo $(LIBVER_MAJOR_SCRIPT)) +LIBVER_MINOR := $(shell echo $(LIBVER_MINOR_SCRIPT)) +LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT)) +LIBVER := $(shell echo $(LIBVER_SCRIPT)) + +LIBFILES = $(wildcard $(LZ4DIR)/*.c) +SRCFILES = $(sort $(LIBFILES) $(wildcard *.c)) +OBJFILES = $(SRCFILES:.c=.o) + +CPPFLAGS += -I$(LZ4DIR) -DXXH_NAMESPACE=LZ4_ +CFLAGS ?= -O3 +DEBUGFLAGS= -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \ + -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \ + -Wpointer-arith -Wstrict-aliasing=1 +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +FLAGS = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) + +LZ4_VERSION=$(LIBVER) +MD2ROFF = ronn +MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="lz4 $(LZ4_VERSION)" + +include ../Makefile.inc + +default: lz4-release + +all: lz4 lz4c + +all32: CFLAGS+=-m32 +all32: all + +ifeq ($(WINBASED),yes) +lz4-exe.rc: lz4-exe.rc.in + @echo creating executable resource + $(Q)sed -e 's|@PROGNAME@|lz4|' \ + -e 's|@LIBVER_MAJOR@|$(LIBVER_MAJOR)|g' \ + -e 's|@LIBVER_MINOR@|$(LIBVER_MINOR)|g' \ + -e 's|@LIBVER_PATCH@|$(LIBVER_PATCH)|g' \ + -e 's|@EXT@|$(EXT)|g' \ + $< >$@ + +lz4-exe.o: lz4-exe.rc + $(WINDRES) -i lz4-exe.rc -o lz4-exe.o + +lz4: $(OBJFILES) lz4-exe.o + $(CC) $(FLAGS) $^ -o $@$(EXT) +else +lz4: $(OBJFILES) + $(CC) $(FLAGS) $(OBJFILES) -o $@$(EXT) $(LDLIBS) +endif + +.PHONY: lz4-release +lz4-release: DEBUGFLAGS= +lz4-release: lz4 + +lz4-wlib: LIBFILES = +lz4-wlib: SRCFILES+= $(LZ4DIR)/xxhash.c # benchmark unit needs XXH64() +lz4-wlib: LDFLAGS += -L $(LZ4DIR) +lz4-wlib: LDLIBS = -llz4 +lz4-wlib: liblz4 $(OBJFILES) + @echo WARNING: $@ must link to an extended variant of the dynamic library which also exposes unstable symbols + $(CC) $(FLAGS) $(OBJFILES) -o $@$(EXT) $(LDLIBS) + +.PHONY:liblz4 +liblz4: + CPPFLAGS="-DLZ4F_PUBLISH_STATIC_FUNCTIONS -DLZ4_PUBLISH_STATIC_FUNCTIONS" $(MAKE) -C $(LZ4DIR) liblz4 + +lz4c: lz4 + $(LN_SF) lz4$(EXT) lz4c$(EXT) + +lz4c32: CFLAGS += -m32 +lz4c32 : $(SRCFILES) + $(CC) $(FLAGS) $^ -o $@$(EXT) + +lz4.1: lz4.1.md $(LIBVER_SRC) + cat $< | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@ + +man: lz4.1 + +clean-man: + $(RM) lz4.1 + +preview-man: clean-man man + man ./lz4.1 + +clean: +ifeq ($(WINBASED),yes) + $(Q)$(RM) *.rc +endif + @$(MAKE) -C $(LZ4DIR) $@ > $(VOID) + @$(RM) core *.o *.test tmp* \ + lz4$(EXT) lz4c$(EXT) lz4c32$(EXT) lz4-wlib$(EXT) \ + unlz4$(EXT) lz4cat$(EXT) + @echo Cleaning completed + + +#----------------------------------------------------------------------------- +# make install is validated only for Linux, OSX, BSD, Hurd and Solaris targets +#----------------------------------------------------------------------------- +ifeq ($(POSIX_ENV),Yes) + +unlz4: lz4 + $(LN_SF) lz4$(EXT) unlz4$(EXT) + +lz4cat: lz4 + $(LN_SF) lz4$(EXT) lz4cat$(EXT) + +DESTDIR ?= +# directory variables : GNU conventions prefer lowercase +# see https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html +# support both lower and uppercase (BSD), use lowercase in script +PREFIX ?= /usr/local +prefix ?= $(PREFIX) +EXEC_PREFIX ?= $(prefix) +exec_prefix ?= $(EXEC_PREFIX) +BINDIR ?= $(exec_prefix)/bin +bindir ?= $(BINDIR) +DATAROOTDIR ?= $(prefix)/share +datarootdir ?= $(DATAROOTDIR) +MANDIR ?= $(datarootdir)/man +mandir ?= $(MANDIR) +MAN1DIR ?= $(mandir)/man1 +man1dir ?= $(MAN1DIR) + +install: lz4 + @echo Installing binaries + @$(INSTALL_DIR) $(DESTDIR)$(bindir)/ $(DESTDIR)$(man1dir)/ + @$(INSTALL_PROGRAM) lz4$(EXT) $(DESTDIR)$(bindir)/lz4$(EXT) + @$(LN_S) lz4$(EXT) $(DESTDIR)$(bindir)/lz4c$(EXT) + @$(LN_S) lz4$(EXT) $(DESTDIR)$(bindir)/lz4cat$(EXT) + @$(LN_S) lz4$(EXT) $(DESTDIR)$(bindir)/unlz4$(EXT) + @echo Installing man pages + @$(INSTALL_DATA) lz4.1 $(DESTDIR)$(man1dir)/lz4.1 + @$(LN_SF) lz4.1 $(DESTDIR)$(man1dir)/lz4c.1 + @$(LN_SF) lz4.1 $(DESTDIR)$(man1dir)/lz4cat.1 + @$(LN_SF) lz4.1 $(DESTDIR)$(man1dir)/unlz4.1 + @echo lz4 installation completed + +uninstall: + @$(RM) $(DESTDIR)$(bindir)/lz4cat$(EXT) + @$(RM) $(DESTDIR)$(bindir)/unlz4$(EXT) + @$(RM) $(DESTDIR)$(bindir)/lz4$(EXT) + @$(RM) $(DESTDIR)$(bindir)/lz4c$(EXT) + @$(RM) $(DESTDIR)$(man1dir)/lz4.1 + @$(RM) $(DESTDIR)$(man1dir)/lz4c.1 + @$(RM) $(DESTDIR)$(man1dir)/lz4cat.1 + @$(RM) $(DESTDIR)$(man1dir)/unlz4.1 + @echo lz4 programs successfully uninstalled + +endif diff --git a/programs/README.md b/programs/README.md new file mode 100644 index 0000000..c1995af --- /dev/null +++ b/programs/README.md @@ -0,0 +1,84 @@ +Command Line Interface for LZ4 library +============================================ + +### Build +The Command Line Interface (CLI) can be generated +using the `make` command without any additional parameters. + +The `Makefile` script supports all [standard conventions](https://www.gnu.org/prep/standards/html_node/Makefile-Conventions.html), +including standard targets (`all`, `install`, `clean`, etc.) +and standard variables (`CC`, `CFLAGS`, `CPPFLAGS`, etc.). + +For advanced use cases, there are targets to different variations of the CLI: +- `lz4` : default CLI, with a command line syntax close to gzip +- `lz4c` : Same as `lz4` with additional support legacy lz4 commands (incompatible with gzip) +- `lz4c32` : Same as `lz4c`, but forced to compile in 32-bits mode + +The CLI generates and decodes [LZ4-compressed frames](../doc/lz4_Frame_format.md). + + +#### Aggregation of parameters +CLI supports aggregation of parameters i.e. `-b1`, `-e18`, and `-i1` can be joined into `-b1e18i1`. + + +#### Benchmark in Command Line Interface +CLI includes in-memory compression benchmark module for lz4. +The benchmark is conducted using a given filename. +The file is read into memory. +It makes benchmark more precise as it eliminates I/O overhead. + +The benchmark measures ratio, compressed size, compression and decompression speed. +One can select compression levels starting from `-b` and ending with `-e`. +The `-i` parameter selects a number of seconds used for each of tested levels. + + + +#### Usage of Command Line Interface +The full list of commands can be obtained with `-h` or `-H` parameter: +``` +Usage : + lz4 [arg] [input] [output] + +input : a filename + with no FILE, or when FILE is - or stdin, read standard input +Arguments : + -1 : Fast compression (default) + -9 : High compression + -d : decompression (default for .lz4 extension) + -z : force compression + -D FILE: use FILE as dictionary + -f : overwrite output without prompting + -k : preserve source files(s) (default) +--rm : remove source file(s) after successful de/compression + -h/-H : display help/long help and exit + +Advanced arguments : + -V : display Version number and exit + -v : verbose mode + -q : suppress warnings; specify twice to suppress errors too + -c : force write to standard output, even if it is the console + -t : test compressed file integrity + -m : multiple input files (implies automatic output filenames) + -r : operate recursively on directories (sets also -m) + -l : compress using Legacy format (Linux kernel compression) + -B# : cut file into blocks of size # bytes [32+] + or predefined block size [4-7] (default: 7) + -BD : Block dependency (improve compression ratio) + -BX : enable block checksum (default:disabled) +--no-frame-crc : disable stream checksum (default:enabled) +--content-size : compressed frame includes original size (default:not present) +--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout) +--favor-decSpeed: compressed files decompress faster, but are less compressed +--fast[=#]: switch to ultra fast compression level (default: 1) + +Benchmark arguments : + -b# : benchmark file(s), using # compression level (default : 1) + -e# : test all compression levels from -bX to # (default : 1) + -i# : minimum evaluation time in seconds (default : 3s)``` +``` + +#### License + +All files in this directory are licensed under GPL-v2. +See [COPYING](COPYING) for details. +The text of the license is also included at the top of each source file. diff --git a/programs/bench.c b/programs/bench.c new file mode 100644 index 0000000..3357d14 --- /dev/null +++ b/programs/bench.c @@ -0,0 +1,746 @@ +/* + bench.c - Demo program to benchmark open-source compression algorithms + Copyright (C) Yann Collet 2012-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + + +/*-************************************ +* Compiler options +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif + + +/* ************************************* +* Includes +***************************************/ +#include "platform.h" /* Compiler options */ +#include "util.h" /* UTIL_GetFileSize, UTIL_sleep */ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen, ftello */ +#include /* clock_t, clock, CLOCKS_PER_SEC */ +#include /* assert */ + +#include "datagen.h" /* RDG_genBuffer */ +#include "xxhash.h" +#include "bench.h" + +#define LZ4_STATIC_LINKING_ONLY +#include "lz4.h" +#define LZ4_HC_STATIC_LINKING_ONLY +#include "lz4hc.h" + + +/* ************************************* +* Compression parameters and functions +***************************************/ + +struct compressionParameters +{ + int cLevel; + const char* dictBuf; + int dictSize; + + LZ4_stream_t* LZ4_stream; + LZ4_stream_t* LZ4_dictStream; + LZ4_streamHC_t* LZ4_streamHC; + LZ4_streamHC_t* LZ4_dictStreamHC; + + void (*initFunction)( + struct compressionParameters* pThis); + void (*resetFunction)( + const struct compressionParameters* pThis); + int (*blockFunction)( + const struct compressionParameters* pThis, + const char* src, char* dst, int srcSize, int dstSize); + void (*cleanupFunction)( + const struct compressionParameters* pThis); +}; + +static void LZ4_compressInitNoStream( + struct compressionParameters* pThis) +{ + pThis->LZ4_stream = NULL; + pThis->LZ4_dictStream = NULL; + pThis->LZ4_streamHC = NULL; + pThis->LZ4_dictStreamHC = NULL; +} + +static void LZ4_compressInitStream( + struct compressionParameters* pThis) +{ + pThis->LZ4_stream = LZ4_createStream(); + pThis->LZ4_dictStream = LZ4_createStream(); + pThis->LZ4_streamHC = NULL; + pThis->LZ4_dictStreamHC = NULL; + LZ4_loadDict(pThis->LZ4_dictStream, pThis->dictBuf, pThis->dictSize); +} + +static void LZ4_compressInitStreamHC( + struct compressionParameters* pThis) +{ + pThis->LZ4_stream = NULL; + pThis->LZ4_dictStream = NULL; + pThis->LZ4_streamHC = LZ4_createStreamHC(); + pThis->LZ4_dictStreamHC = LZ4_createStreamHC(); + LZ4_loadDictHC(pThis->LZ4_dictStreamHC, pThis->dictBuf, pThis->dictSize); +} + +static void LZ4_compressResetNoStream( + const struct compressionParameters* pThis) +{ + (void)pThis; +} + +static void LZ4_compressResetStream( + const struct compressionParameters* pThis) +{ + LZ4_resetStream_fast(pThis->LZ4_stream); + LZ4_attach_dictionary(pThis->LZ4_stream, pThis->LZ4_dictStream); +} + +static void LZ4_compressResetStreamHC( + const struct compressionParameters* pThis) +{ + LZ4_resetStreamHC_fast(pThis->LZ4_streamHC, pThis->cLevel); + LZ4_attach_HC_dictionary(pThis->LZ4_streamHC, pThis->LZ4_dictStreamHC); +} + +static int LZ4_compressBlockNoStream( + const struct compressionParameters* pThis, + const char* src, char* dst, + int srcSize, int dstSize) +{ + int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1; + return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration); +} + +static int LZ4_compressBlockNoStreamHC( + const struct compressionParameters* pThis, + const char* src, char* dst, + int srcSize, int dstSize) +{ + return LZ4_compress_HC(src, dst, srcSize, dstSize, pThis->cLevel); +} + +static int LZ4_compressBlockStream( + const struct compressionParameters* pThis, + const char* src, char* dst, + int srcSize, int dstSize) +{ + int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1; + return LZ4_compress_fast_continue(pThis->LZ4_stream, src, dst, srcSize, dstSize, acceleration); +} + +static int LZ4_compressBlockStreamHC( + const struct compressionParameters* pThis, + const char* src, char* dst, + int srcSize, int dstSize) +{ + return LZ4_compress_HC_continue(pThis->LZ4_streamHC, src, dst, srcSize, dstSize); +} + +static void LZ4_compressCleanupNoStream( + const struct compressionParameters* pThis) +{ + (void)pThis; +} + +static void LZ4_compressCleanupStream( + const struct compressionParameters* pThis) +{ + LZ4_freeStream(pThis->LZ4_stream); + LZ4_freeStream(pThis->LZ4_dictStream); +} + +static void LZ4_compressCleanupStreamHC( + const struct compressionParameters* pThis) +{ + LZ4_freeStreamHC(pThis->LZ4_streamHC); + LZ4_freeStreamHC(pThis->LZ4_dictStreamHC); +} + +static void LZ4_buildCompressionParameters( + struct compressionParameters* pParams, + int cLevel, const char* dictBuf, int dictSize) +{ + pParams->cLevel = cLevel; + pParams->dictBuf = dictBuf; + pParams->dictSize = dictSize; + + if (dictSize) { + if (cLevel < LZ4HC_CLEVEL_MIN) { + pParams->initFunction = LZ4_compressInitStream; + pParams->resetFunction = LZ4_compressResetStream; + pParams->blockFunction = LZ4_compressBlockStream; + pParams->cleanupFunction = LZ4_compressCleanupStream; + } else { + pParams->initFunction = LZ4_compressInitStreamHC; + pParams->resetFunction = LZ4_compressResetStreamHC; + pParams->blockFunction = LZ4_compressBlockStreamHC; + pParams->cleanupFunction = LZ4_compressCleanupStreamHC; + } + } else { + pParams->initFunction = LZ4_compressInitNoStream; + pParams->resetFunction = LZ4_compressResetNoStream; + pParams->cleanupFunction = LZ4_compressCleanupNoStream; + + if (cLevel < LZ4HC_CLEVEL_MIN) { + pParams->blockFunction = LZ4_compressBlockNoStream; + } else { + pParams->blockFunction = LZ4_compressBlockNoStreamHC; + } + } +} + +#define LZ4_isError(errcode) (errcode==0) + + +/* ************************************* +* Constants +***************************************/ +#ifndef LZ4_GIT_COMMIT_STRING +# define LZ4_GIT_COMMIT_STRING "" +#else +# define LZ4_GIT_COMMIT_STRING LZ4_EXPAND_AND_QUOTE(LZ4_GIT_COMMIT) +#endif + +#define NBSECONDS 3 +#define TIMELOOP_MICROSEC 1*1000000ULL /* 1 second */ +#define TIMELOOP_NANOSEC 1*1000000000ULL /* 1 second */ +#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */ +#define COOLPERIOD_SEC 10 +#define DECOMP_MULT 1 /* test decompression DECOMP_MULT times longer than compression */ + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define LZ4_MAX_DICT_SIZE (64 KB) + +static const size_t maxMemory = (sizeof(size_t)==4) ? (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31)); + +static U32 g_compressibilityDefault = 50; + + +/* ************************************* +* console display +***************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static U32 g_displayLevel = 2; /* 0 : no display; 1: errors; 2 : + result + interaction + warnings; 3 : + progression; 4 : + information */ + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \ + { g_time = clock(); DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stdout); } } +static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100; +static clock_t g_time = 0; + + +/* ************************************* +* Exceptions +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif +#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, "\n"); \ + exit(error); \ +} + + +/* ************************************* +* Benchmark Parameters +***************************************/ +static U32 g_nbSeconds = NBSECONDS; +static size_t g_blockSize = 0; +int g_additionalParam = 0; +int g_benchSeparately = 0; + +void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; } + +void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; } + +void BMK_setNbSeconds(unsigned nbSeconds) +{ + g_nbSeconds = nbSeconds; + DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds); +} + +void BMK_setBlockSize(size_t blockSize) { g_blockSize = blockSize; } + +void BMK_setBenchSeparately(int separate) { g_benchSeparately = (separate!=0); } + + +/* ******************************************************** +* Bench functions +**********************************************************/ +typedef struct { + const char* srcPtr; + size_t srcSize; + char* cPtr; + size_t cRoom; + size_t cSize; + char* resPtr; + size_t resSize; +} blockParam_t; + +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +static int BMK_benchMem(const void* srcBuffer, size_t srcSize, + const char* displayName, int cLevel, + const size_t* fileSizes, U32 nbFiles, + const char* dictBuf, int dictSize) +{ + size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ; + U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles; + blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t)); + size_t const maxCompressedSize = LZ4_compressBound((int)srcSize) + (maxNbBlocks * 1024); /* add some room for safety */ + void* const compressedBuffer = malloc(maxCompressedSize); + void* const resultBuffer = malloc(srcSize); + U32 nbBlocks; + struct compressionParameters compP; + + /* checks */ + if (!compressedBuffer || !resultBuffer || !blockTable) + EXM_THROW(31, "allocation error : not enough memory"); + + if (strlen(displayName)>17) displayName += strlen(displayName)-17; /* can only display 17 characters */ + + /* init */ + LZ4_buildCompressionParameters(&compP, cLevel, dictBuf, dictSize); + compP.initFunction(&compP); + + /* Init blockTable data */ + { const char* srcPtr = (const char*)srcBuffer; + char* cPtr = (char*)compressedBuffer; + char* resPtr = (char*)resultBuffer; + U32 fileNb; + for (nbBlocks=0, fileNb=0; fileNb ACTIVEPERIOD_MICROSEC) { + DISPLAYLEVEL(2, "\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + coolTime = UTIL_getTime(); + } + + /* Compression */ + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize); + if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize); /* warm up and erase result buffer */ + + UTIL_sleepMilli(1); /* give processor time to other processes */ + UTIL_waitForNextTick(); + + if (!cCompleted) { /* still some time to do compression tests */ + UTIL_time_t const clockStart = UTIL_getTime(); + U32 nbLoops; + for (nbLoops=0; nbLoops < nbCompressionLoops; nbLoops++) { + U32 blockNb; + compP.resetFunction(&compP); + for (blockNb=0; blockNb 0) { + if (clockSpan < fastestC * nbCompressionLoops) + fastestC = clockSpan / nbCompressionLoops; + assert(fastestC > 0); + nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1; /* aim for ~1sec */ + } else { + assert(nbCompressionLoops < 40000000); /* avoid overflow */ + nbCompressionLoops *= 100; + } + totalCTime += clockSpan; + cCompleted = totalCTime>maxTime; + } } + + cSize = 0; + { U32 blockNb; for (blockNb=0; blockNb %10u (%5.3f),%6.1f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio, + ((double)srcSize / fastestC) * 1000 ); + + (void)fastestD; (void)crcOrig; /* unused when decompression disabled */ +#if 1 + /* Decompression */ + if (!dCompleted) memset(resultBuffer, 0xD6, srcSize); /* warm result buffer */ + + UTIL_sleepMilli(5); /* give processor time to other processes */ + UTIL_waitForNextTick(); + + if (!dCompleted) { + UTIL_time_t const clockStart = UTIL_getTime(); + U32 nbLoops; + for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) { + U32 blockNb; + for (blockNb=0; blockNb 0) { + if (clockSpan < fastestD * nbDecodeLoops) + fastestD = clockSpan / nbDecodeLoops; + assert(fastestD > 0); + nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1; /* aim for ~1sec */ + } else { + assert(nbDecodeLoops < 40000000); /* avoid overflow */ + nbDecodeLoops *= 100; + } + totalDTime += clockSpan; + dCompleted = totalDTime > (DECOMP_MULT*maxTime); + } } + + markNb = (markNb+1) % NB_MARKS; + DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r", + marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio, + ((double)srcSize / fastestC) * 1000, + ((double)srcSize / fastestD) * 1000); + + /* CRC Checking */ + { U64 const crcCheck = XXH64(resultBuffer, srcSize, 0); + if (crcOrig!=crcCheck) { + size_t u; + DISPLAY("\n!!! WARNING !!! %17s : Invalid Checksum : %x != %x \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck); + for (u=0; u u) break; + bacc += blockTable[segNb].srcSize; + } + pos = (U32)(u - bacc); + bNb = pos / (128 KB); + DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos); + break; + } + if (u==srcSize-1) { /* should never happen */ + DISPLAY("no difference detected\n"); + } } + break; + } } /* CRC Checking */ +#endif + } /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */ + + if (g_displayLevel == 1) { + double const cSpeed = ((double)srcSize / fastestC) * 1000; + double const dSpeed = ((double)srcSize / fastestD) * 1000; + if (g_additionalParam) + DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam); + else + DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName); + } + DISPLAYLEVEL(2, "%2i#\n", cLevel); + } /* Bench */ + + /* clean up */ + compP.cleanupFunction(&compP); + free(blockTable); + free(compressedBuffer); + free(resultBuffer); + return 0; +} + + +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t step = 64 MB; + BYTE* testmem=NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + requiredMem += 2*step; + if (requiredMem > maxMemory) requiredMem = maxMemory; + + while (!testmem) { + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; + testmem = (BYTE*) malloc ((size_t)requiredMem); + } + free (testmem); + + /* keep some space available */ + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; + + return (size_t)requiredMem; +} + + +static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize, + const char* displayName, int cLevel, int cLevelLast, + const size_t* fileSizes, unsigned nbFiles, + const char* dictBuf, int dictSize) +{ + int l; + + const char* pch = strrchr(displayName, '\\'); /* Windows */ + if (!pch) pch = strrchr(displayName, '/'); /* Linux */ + if (pch) displayName = pch+1; + + SET_REALTIME_PRIORITY; + + if (g_displayLevel == 1 && !g_additionalParam) + DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10)); + + if (cLevelLast < cLevel) cLevelLast = cLevel; + + for (l=cLevel; l <= cLevelLast; l++) { + BMK_benchMem(srcBuffer, benchedSize, + displayName, l, + fileSizes, nbFiles, + dictBuf, dictSize); + } +} + + +/*! BMK_loadFiles() : + Loads `buffer` with content of files listed within `fileNamesTable`. + At most, fills `buffer` entirely */ +static void BMK_loadFiles(void* buffer, size_t bufferSize, + size_t* fileSizes, + const char** fileNamesTable, unsigned nbFiles) +{ + size_t pos = 0, totalSize = 0; + unsigned n; + for (n=0; n bufferSize-pos) { /* buffer too small - stop after this file */ + fileSize = bufferSize-pos; + nbFiles=n; + } + { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f); + if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]); + pos += readSize; } + fileSizes[n] = (size_t)fileSize; + totalSize += (size_t)fileSize; + fclose(f); + } + + if (totalSize == 0) EXM_THROW(12, "no data to bench"); +} + +static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles, + int cLevel, int cLevelLast, + const char* dictBuf, int dictSize) +{ + void* srcBuffer; + size_t benchedSize; + size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t)); + U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles); + char mfName[20] = {0}; + + if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes"); + + /* Memory allocation & restrictions */ + benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3; + if (benchedSize==0) EXM_THROW(12, "not enough memory"); + if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad; + if (benchedSize > LZ4_MAX_INPUT_SIZE) { + benchedSize = LZ4_MAX_INPUT_SIZE; + DISPLAY("File(s) bigger than LZ4's max input size; testing %u MB only...\n", (U32)(benchedSize >> 20)); + } else { + if (benchedSize < totalSizeToLoad) + DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20)); + } + srcBuffer = malloc(benchedSize + !benchedSize); /* avoid alloc of zero */ + if (!srcBuffer) EXM_THROW(12, "not enough memory"); + + /* Load input buffer */ + BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles); + + /* Bench */ + snprintf (mfName, sizeof(mfName), " %u files", nbFiles); + { const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0]; + BMK_benchCLevel(srcBuffer, benchedSize, + displayName, cLevel, cLevelLast, + fileSizes, nbFiles, + dictBuf, dictSize); + } + + /* clean up */ + free(srcBuffer); + free(fileSizes); +} + + +static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, + const char* dictBuf, int dictSize) +{ + char name[20] = {0}; + size_t benchedSize = 10000000; + void* const srcBuffer = malloc(benchedSize); + + /* Memory allocation */ + if (!srcBuffer) EXM_THROW(21, "not enough memory"); + + /* Fill input buffer */ + RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0); + + /* Bench */ + snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100)); + BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, dictBuf, dictSize); + + /* clean up */ + free(srcBuffer); +} + + +int BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles, + int cLevel, int cLevelLast, + const char* dictBuf, int dictSize) +{ + unsigned fileNb; + if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX; + if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX; + if (cLevelLast < cLevel) cLevelLast = cLevel; + if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + + for (fileNb=0; fileNb LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX; + if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX; + if (cLevelLast < cLevel) cLevelLast = cLevel; + if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast); + + if (dictFileName) { + FILE* dictFile = NULL; + U64 dictFileSize = UTIL_getFileSize(dictFileName); + if (!dictFileSize) EXM_THROW(25, "Dictionary error : could not stat dictionary file"); + + dictFile = fopen(dictFileName, "rb"); + if (!dictFile) EXM_THROW(25, "Dictionary error : could not open dictionary file"); + + if (dictFileSize > LZ4_MAX_DICT_SIZE) { + dictSize = LZ4_MAX_DICT_SIZE; + if (UTIL_fseek(dictFile, dictFileSize - dictSize, SEEK_SET)) + EXM_THROW(25, "Dictionary error : could not seek dictionary file"); + } else { + dictSize = (int)dictFileSize; + } + + dictBuf = (char *)malloc(dictSize); + if (!dictBuf) EXM_THROW(25, "Allocation error : not enough memory"); + + if (fread(dictBuf, 1, dictSize, dictFile) != (size_t)dictSize) + EXM_THROW(25, "Dictionary error : could not read dictionary file"); + + fclose(dictFile); + } + + if (nbFiles == 0) + BMK_syntheticTest(cLevel, cLevelLast, compressibility, dictBuf, dictSize); + else { + if (g_benchSeparately) + BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, dictSize); + else + BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, dictSize); + } + + free(dictBuf); + return 0; +} diff --git a/programs/bench.h b/programs/bench.h new file mode 100644 index 0000000..22ebf60 --- /dev/null +++ b/programs/bench.h @@ -0,0 +1,39 @@ +/* + bench.h - Demo program to benchmark open-source compression algorithm + Copyright (C) Yann Collet 2012-2016 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +#ifndef BENCH_H_125623623633 +#define BENCH_H_125623623633 + +#include + +int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles, + int cLevel, int cLevelLast, + const char* dictFileName); + +/* Set Parameters */ +void BMK_setNbSeconds(unsigned nbLoops); +void BMK_setBlockSize(size_t blockSize); +void BMK_setAdditionalParam(int additionalParam); +void BMK_setNotificationLevel(unsigned level); +void BMK_setBenchSeparately(int separate); + +#endif /* BENCH_H_125623623633 */ diff --git a/programs/datagen.c b/programs/datagen.c new file mode 100644 index 0000000..24a2da2 --- /dev/null +++ b/programs/datagen.c @@ -0,0 +1,189 @@ +/* + datagen.c - compressible data generator test tool + Copyright (C) Yann Collet 2012-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** +* Includes +**************************************/ +#include "platform.h" /* Compiler options, SET_BINARY_MODE */ +#include "util.h" /* U32 */ +#include /* malloc */ +#include /* FILE, fwrite */ +#include /* memcpy */ +#include + + +/************************************** +* Constants +**************************************/ +#define KB *(1 <<10) + +#define PRIME1 2654435761U +#define PRIME2 2246822519U + + +/************************************** +* Local types +**************************************/ +#define LTLOG 13 +#define LTSIZE (1< > (32 - r))) +static unsigned int RDG_rand(U32* src) +{ + U32 rand32 = *src; + rand32 *= PRIME1; + rand32 ^= PRIME2; + rand32 = RDG_rotl32(rand32, 13); + *src = rand32; + return rand32; +} + + +static void RDG_fillLiteralDistrib(litDistribTable lt, double ld) +{ + BYTE const firstChar = ld <= 0.0 ? 0 : '('; + BYTE const lastChar = ld <= 0.0 ? 255 : '}'; + BYTE character = ld <= 0.0 ? 0 : '0'; + U32 u = 0; + + while (u lastChar) character = firstChar; + } +} + + +static BYTE RDG_genChar(U32* seed, const litDistribTable lt) +{ + U32 id = RDG_rand(seed) & LTMASK; + return (lt[id]); +} + + +#define RDG_DICTSIZE (32 KB) +#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767) +#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15) +void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, litDistribTable lt, unsigned* seedPtr) +{ + BYTE* buffPtr = (BYTE*)buffer; + const U32 matchProba32 = (U32)(32768 * matchProba); + size_t pos = prefixSize; + U32* seed = seedPtr; + + /* special case */ + while (matchProba >= 1.0) { + size_t size0 = RDG_rand(seed) & 3; + size0 = (size_t)1 << (16 + size0 * 2); + size0 += RDG_rand(seed) & (size0-1); /* because size0 is power of 2*/ + if (buffSize < pos + size0) { + memset(buffPtr+pos, 0, buffSize-pos); + return; + } + memset(buffPtr+pos, 0, size0); + pos += size0; + buffPtr[pos-1] = RDG_genChar(seed, lt); + } + + /* init */ + if (pos==0) { + buffPtr[0] = RDG_genChar(seed, lt); + pos=1; + } + + /* Generate compressible data */ + while (pos < buffSize) { + /* Select : Literal (char) or Match (within 32K) */ + if (RDG_RAND15BITS < matchProba32) { + /* Copy (within 32K) */ + size_t match; + size_t d; + int length = RDG_RANDLENGTH + 4; + U32 offset = RDG_RAND15BITS + 1; + if (offset > pos) offset = (U32)pos; + match = pos - offset; + d = pos + length; + if (d > buffSize) d = buffSize; + while (pos < d) buffPtr[pos++] = buffPtr[match++]; + } else { + /* Literal (noise) */ + size_t d; + size_t length = RDG_RANDLENGTH; + d = pos + length; + if (d > buffSize) d = buffSize; + while (pos < d) buffPtr[pos++] = RDG_genChar(seed, lt); + } + } +} + + +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed) +{ + litDistribTable lt; + if (litProba==0.0) litProba = matchProba / 4.5; + RDG_fillLiteralDistrib(lt, litProba); + RDG_genBlock(buffer, size, 0, matchProba, lt, &seed); +} + + +#define RDG_BLOCKSIZE (128 KB) +void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed) +{ + BYTE buff[RDG_DICTSIZE + RDG_BLOCKSIZE]; + U64 total = 0; + size_t genBlockSize = RDG_BLOCKSIZE; + litDistribTable lt; + + /* init */ + if (litProba==0.0) litProba = matchProba / 4.5; + RDG_fillLiteralDistrib(lt, litProba); + SET_BINARY_MODE(stdout); + + /* Generate dict */ + RDG_genBlock(buff, RDG_DICTSIZE, 0, matchProba, lt, &seed); + + /* Generate compressible data */ + while (total < size) { + RDG_genBlock(buff, RDG_DICTSIZE+RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, lt, &seed); + if (size-total < RDG_BLOCKSIZE) genBlockSize = (size_t)(size-total); + total += genBlockSize; + fwrite(buff, 1, genBlockSize, stdout); /* should check potential write error */ + /* update dict */ + memcpy(buff, buff + RDG_BLOCKSIZE, RDG_DICTSIZE); + } +} diff --git a/programs/datagen.h b/programs/datagen.h new file mode 100644 index 0000000..91c5b02 --- /dev/null +++ b/programs/datagen.h @@ -0,0 +1,40 @@ +/* + datagen.h - compressible data generator header + Copyright (C) Yann Collet 2012-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + + +#include /* size_t */ + +void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed); +void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed); +/* RDG_genOut + Generate 'size' bytes of compressible data into stdout. + Compressibility can be controlled using 'matchProba'. + 'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used. + Generated data can be selected using 'seed'. + If (matchProba, litProba and seed) are equal, the function always generate the same content. + + RDG_genBuffer + Same as RDG_genOut, but generate data into provided buffer +*/ diff --git a/programs/lz4-exe.rc.in b/programs/lz4-exe.rc.in new file mode 100644 index 0000000..7b81030 --- /dev/null +++ b/programs/lz4-exe.rc.in @@ -0,0 +1,27 @@ +1 VERSIONINFO +FILEVERSION @LIBVER_MAJOR@,@LIBVER_MINOR@,@LIBVER_PATCH@,0 +PRODUCTVERSION @LIBVER_MAJOR@,@LIBVER_MINOR@,@LIBVER_PATCH@,0 +FILEFLAGSMASK 0 +FILEOS 0x40000 +FILETYPE 1 +{ + BLOCK "StringFileInfo" + { + BLOCK "040904B0" + { + VALUE "CompanyName", "Yann Collet" + VALUE "FileDescription", "Extremely fast compression" + VALUE "FileVersion", "@LIBVER_MAJOR@.@LIBVER_MINOR@.@LIBVER_PATCH@.0" + VALUE "InternalName", "@PROGNAME@" + VALUE "LegalCopyright", "Copyright (C) 2013-2016, Yann Collet" + VALUE "OriginalFilename", "@PROGNAME@.@EXT@" + VALUE "ProductName", "LZ4" + VALUE "ProductVersion", "@LIBVER_MAJOR@.@LIBVER_MINOR@.@LIBVER_PATCH@.0" + } + } + BLOCK "VarFileInfo" + { + VALUE "Translation", 0x0409, 1200 + } +} + diff --git a/programs/lz4.1 b/programs/lz4.1 new file mode 100644 index 0000000..d758ed5 --- /dev/null +++ b/programs/lz4.1 @@ -0,0 +1,241 @@ +. +.TH "LZ4" "1" "July 2019" "lz4 1.9.2" "User Commands" +. +.SH "NAME" +\fBlz4\fR \- lz4, unlz4, lz4cat \- Compress or decompress \.lz4 files +. +.SH "SYNOPSIS" +\fBlz4\fR [\fIOPTIONS\fR] [\-|INPUT\-FILE] \fIOUTPUT\-FILE\fR +. +.P +\fBunlz4\fR is equivalent to \fBlz4 \-d\fR +. +.P +\fBlz4cat\fR is equivalent to \fBlz4 \-dcfm\fR +. +.P +When writing scripts that need to decompress files, it is recommended to always use the name \fBlz4\fR with appropriate arguments (\fBlz4 \-d\fR or \fBlz4 \-dc\fR) instead of the names \fBunlz4\fR and \fBlz4cat\fR\. +. +.SH "DESCRIPTION" +\fBlz4\fR is an extremely fast lossless compression algorithm, based on \fBbyte\-aligned LZ77\fR family of compression scheme\. \fBlz4\fR offers compression speeds of 400 MB/s per core, linearly scalable with multi\-core CPUs\. It features an extremely fast decoder, with speed in multiple GB/s per core, typically reaching RAM speed limit on multi\-core systems\. The native file format is the \fB\.lz4\fR format\. +. +.SS "Difference between lz4 and gzip" +\fBlz4\fR supports a command line syntax similar \fIbut not identical\fR to \fBgzip(1)\fR\. Differences are : +. +.IP "\(bu" 4 +\fBlz4\fR compresses a single file by default (see \fB\-m\fR for multiple files) +. +.IP "\(bu" 4 +\fBlz4 file1 file2\fR means : compress file1 \fIinto\fR file2 +. +.IP "\(bu" 4 +\fBlz4 file\.lz4\fR will default to decompression (use \fB\-z\fR to force compression) +. +.IP "\(bu" 4 +\fBlz4\fR preserves original files +. +.IP "\(bu" 4 +\fBlz4\fR shows real\-time notification statistics during compression or decompression of a single file (use \fB\-q\fR to silence them) +. +.IP "\(bu" 4 +When no destination is specified, result is sent on implicit output, which depends on \fBstdout\fR status\. When \fBstdout\fR \fIis Not the console\fR, it becomes the implicit output\. Otherwise, if \fBstdout\fR is the console, the implicit output is \fBfilename\.lz4\fR\. +. +.IP "\(bu" 4 +It is considered bad practice to rely on implicit output in scripts\. because the script\'s environment may change\. Always use explicit output in scripts\. \fB\-c\fR ensures that output will be \fBstdout\fR\. Conversely, providing a destination name, or using \fB\-m\fR ensures that the output will be either the specified name, or \fBfilename\.lz4\fR respectively\. +. +.IP "" 0 +. +.P +Default behaviors can be modified by opt\-in commands, detailed below\. +. +.IP "\(bu" 4 +\fBlz4 \-m\fR makes it possible to provide multiple input filenames, which will be compressed into files using suffix \fB\.lz4\fR\. Progress notifications become disabled by default (use \fB\-v\fR to enable them)\. This mode has a behavior which more closely mimics \fBgzip\fR command line, with the main remaining difference being that source files are preserved by default\. +. +.IP "\(bu" 4 +Similarly, \fBlz4 \-m \-d\fR can decompress multiple \fB*\.lz4\fR files\. +. +.IP "\(bu" 4 +It\'s possible to opt\-in to erase source files on successful compression or decompression, using \fB\-\-rm\fR command\. +. +.IP "\(bu" 4 +Consequently, \fBlz4 \-m \-\-rm\fR behaves the same as \fBgzip\fR\. +. +.IP "" 0 +. +.SS "Concatenation of \.lz4 files" +It is possible to concatenate \fB\.lz4\fR files as is\. \fBlz4\fR will decompress such files as if they were a single \fB\.lz4\fR file\. For example: +. +.IP "" 4 +. +.nf + +lz4 file1 > foo\.lz4 +lz4 file2 >> foo\.lz4 +. +.fi +. +.IP "" 0 +. +.P +Then \fBlz4cat foo\.lz4\fR is equivalent to \fBcat file1 file2\fR\. +. +.SH "OPTIONS" +. +.SS "Short commands concatenation" +In some cases, some options can be expressed using short command \fB\-x\fR or long command \fB\-\-long\-word\fR\. Short commands can be concatenated together\. For example, \fB\-d \-c\fR is equivalent to \fB\-dc\fR\. Long commands cannot be concatenated\. They must be clearly separated by a space\. +. +.SS "Multiple commands" +When multiple contradictory commands are issued on a same command line, only the latest one will be applied\. +. +.SS "Operation mode" +. +.TP +\fB\-z\fR \fB\-\-compress\fR +Compress\. This is the default operation mode when no operation mode option is specified, no other operation mode is implied from the command name (for example, \fBunlz4\fR implies \fB\-\-decompress\fR), nor from the input file name (for example, a file extension \fB\.lz4\fR implies \fB\-\-decompress\fR by default)\. \fB\-z\fR can also be used to force compression of an already compressed \fB\.lz4\fR file\. +. +.TP +\fB\-d\fR \fB\-\-decompress\fR \fB\-\-uncompress\fR +Decompress\. \fB\-\-decompress\fR is also the default operation when the input filename has an \fB\.lz4\fR extension\. +. +.TP +\fB\-t\fR \fB\-\-test\fR +Test the integrity of compressed \fB\.lz4\fR files\. The decompressed data is discarded\. No files are created nor removed\. +. +.TP +\fB\-b#\fR +Benchmark mode, using \fB#\fR compression level\. +. +.TP +\fB\-\-list\fR +List information about \.lz4 files\. note : current implementation is limited to single\-frame \.lz4 files\. +. +.SS "Operation modifiers" +. +.TP +\fB\-#\fR +Compression level, with # being any value from 1 to 12\. Higher values trade compression speed for compression ratio\. Values above 12 are considered the same as 12\. Recommended values are 1 for fast compression (default), and 9 for high compression\. Speed/compression trade\-off will vary depending on data to compress\. Decompression speed remains fast at all settings\. +. +.TP +\fB\-\-fast[=#]\fR +Switch to ultra\-fast compression levels\. The higher the value, the faster the compression speed, at the cost of some compression ratio\. If \fB=#\fR is not present, it defaults to \fB1\fR\. This setting overrides compression level if one was set previously\. Similarly, if a compression level is set after \fB\-\-fast\fR, it overrides it\. +. +.TP +\fB\-\-best\fR +Set highest compression level\. Same as -12\. +. +.TP +\fB\-\-favor\-decSpeed\fR +Generate compressed data optimized for decompression speed\. Compressed data will be larger as a consequence (typically by ~0\.5%), while decompression speed will be improved by 5\-20%, depending on use cases\. This option only works in combination with very high compression levels (>=10)\. +. +.TP +\fB\-D dictionaryName\fR +Compress, decompress or benchmark using dictionary \fIdictionaryName\fR\. Compression and decompression must use the same dictionary to be compatible\. Using a different dictionary during decompression will either abort due to decompression error, or generate a checksum error\. +. +.TP +\fB\-f\fR \fB\-\-[no\-]force\fR +This option has several effects: +. +.IP +If the target file already exists, overwrite it without prompting\. +. +.IP +When used with \fB\-\-decompress\fR and \fBlz4\fR cannot recognize the type of the source file, copy the source file as is to standard output\. This allows \fBlz4cat \-\-force\fR to be used like \fBcat (1)\fR for files that have not been compressed with \fBlz4\fR\. +. +.TP +\fB\-c\fR \fB\-\-stdout\fR \fB\-\-to\-stdout\fR +Force write to standard output, even if it is the console\. +. +.TP +\fB\-m\fR \fB\-\-multiple\fR +Multiple input files\. Compressed file names will be appended a \fB\.lz4\fR suffix\. This mode also reduces notification level\. Can also be used to list multiple files\. \fBlz4 \-m\fR has a behavior equivalent to \fBgzip \-k\fR (it preserves source files by default)\. +. +.TP +\fB\-r\fR +operate recursively on directories\. This mode also sets \fB\-m\fR (multiple input files)\. +. +.TP +\fB\-B#\fR +Block size [4\-7](default : 7) +. +.br +\fB\-B4\fR= 64KB ; \fB\-B5\fR= 256KB ; \fB\-B6\fR= 1MB ; \fB\-B7\fR= 4MB +. +.TP +\fB\-BI\fR +Produce independent blocks (default) +. +.TP +\fB\-BD\fR +Blocks depend on predecessors (improves compression ratio, more noticeable on small blocks) +. +.TP +\fB\-\-[no\-]frame\-crc\fR +Select frame checksum (default:enabled) +. +.TP +\fB\-\-[no\-]content\-size\fR +Header includes original size (default:not present) +. +.br +Note : this option can only be activated when the original size can be determined, hence for a file\. It won\'t work with unknown source size, such as stdin or pipe\. +. +.TP +\fB\-\-[no\-]sparse\fR +Sparse mode support (default:enabled on file, disabled on stdout) +. +.TP +\fB\-l\fR +Use Legacy format (typically for Linux Kernel compression) +. +.br +Note : \fB\-l\fR is not compatible with \fB\-m\fR (\fB\-\-multiple\fR) nor \fB\-r\fR +. +.SS "Other options" +. +.TP +\fB\-v\fR \fB\-\-verbose\fR +Verbose mode +. +.TP +\fB\-q\fR \fB\-\-quiet\fR +Suppress warnings and real\-time statistics; specify twice to suppress errors too +. +.TP +\fB\-h\fR \fB\-H\fR \fB\-\-help\fR +Display help/long help and exit +. +.TP +\fB\-V\fR \fB\-\-version\fR +Display Version number and exit +. +.TP +\fB\-k\fR \fB\-\-keep\fR +Preserve source files (default behavior) +. +.TP +\fB\-\-rm\fR +Delete source files on successful compression or decompression +. +.TP +\fB\-\-\fR +Treat all subsequent arguments as files +. +.SS "Benchmark mode" +. +.TP +\fB\-b#\fR +Benchmark file(s), using # compression level +. +.TP +\fB\-e#\fR +Benchmark multiple compression levels, from b# to e# (included) +. +.TP +\fB\-i#\fR +Minimum evaluation time in seconds [1\-9] (default : 3) +. +.SH "BUGS" +Report bugs at: https://github\.com/lz4/lz4/issues +. +.SH "AUTHOR" +Yann Collet diff --git a/programs/lz4.1.md b/programs/lz4.1.md new file mode 100644 index 0000000..56c0053 --- /dev/null +++ b/programs/lz4.1.md @@ -0,0 +1,250 @@ +lz4(1) -- lz4, unlz4, lz4cat - Compress or decompress .lz4 files +================================================================ + +SYNOPSIS +-------- + +`lz4` [*OPTIONS*] [-|INPUT-FILE] + +`unlz4` is equivalent to `lz4 -d` + +`lz4cat` is equivalent to `lz4 -dcfm` + +When writing scripts that need to decompress files, +it is recommended to always use the name `lz4` with appropriate arguments +(`lz4 -d` or `lz4 -dc`) instead of the names `unlz4` and `lz4cat`. + + +DESCRIPTION +----------- + +`lz4` is an extremely fast lossless compression algorithm, +based on **byte-aligned LZ77** family of compression scheme. +`lz4` offers compression speeds of 400 MB/s per core, linearly scalable with +multi-core CPUs. +It features an extremely fast decoder, with speed in multiple GB/s per core, +typically reaching RAM speed limit on multi-core systems. +The native file format is the `.lz4` format. + +### Difference between lz4 and gzip + +`lz4` supports a command line syntax similar _but not identical_ to `gzip(1)`. +Differences are : + + * `lz4` compresses a single file by default (see `-m` for multiple files) + * `lz4 file1 file2` means : compress file1 _into_ file2 + * `lz4 file.lz4` will default to decompression (use `-z` to force compression) + * `lz4` preserves original files + * `lz4` shows real-time notification statistics + during compression or decompression of a single file + (use `-q` to silence them) + * When no destination is specified, result is sent on implicit output, + which depends on `stdout` status. + When `stdout` _is Not the console_, it becomes the implicit output. + Otherwise, if `stdout` is the console, the implicit output is `filename.lz4`. + * It is considered bad practice to rely on implicit output in scripts. + because the script's environment may change. + Always use explicit output in scripts. + `-c` ensures that output will be `stdout`. + Conversely, providing a destination name, or using `-m` + ensures that the output will be either the specified name, or `filename.lz4` respectively. + +Default behaviors can be modified by opt-in commands, detailed below. + + * `lz4 -m` makes it possible to provide multiple input filenames, + which will be compressed into files using suffix `.lz4`. + Progress notifications become disabled by default (use `-v` to enable them). + This mode has a behavior which more closely mimics `gzip` command line, + with the main remaining difference being that source files are preserved by default. + * Similarly, `lz4 -m -d` can decompress multiple `*.lz4` files. + * It's possible to opt-in to erase source files + on successful compression or decompression, using `--rm` command. + * Consequently, `lz4 -m --rm` behaves the same as `gzip`. + +### Concatenation of .lz4 files + +It is possible to concatenate `.lz4` files as is. +`lz4` will decompress such files as if they were a single `.lz4` file. +For example: + + lz4 file1 > foo.lz4 + lz4 file2 >> foo.lz4 + +Then `lz4cat foo.lz4` is equivalent to `cat file1 file2`. + +OPTIONS +------- + +### Short commands concatenation + +In some cases, some options can be expressed using short command `-x` +or long command `--long-word`. +Short commands can be concatenated together. +For example, `-d -c` is equivalent to `-dc`. +Long commands cannot be concatenated. They must be clearly separated by a space. + +### Multiple commands + +When multiple contradictory commands are issued on a same command line, +only the latest one will be applied. + +### Operation mode + +* `-z` `--compress`: + Compress. + This is the default operation mode when no operation mode option is + specified, no other operation mode is implied from the command name + (for example, `unlz4` implies `--decompress`), + nor from the input file name + (for example, a file extension `.lz4` implies `--decompress` by default). + `-z` can also be used to force compression of an already compressed + `.lz4` file. + +* `-d` `--decompress` `--uncompress`: + Decompress. + `--decompress` is also the default operation when the input filename has an + `.lz4` extension. + +* `-t` `--test`: + Test the integrity of compressed `.lz4` files. + The decompressed data is discarded. + No files are created nor removed. + +* `-b#`: + Benchmark mode, using `#` compression level. + +* `--list`: + List information about .lz4 files. + note : current implementation is limited to single-frame .lz4 files. + +### Operation modifiers + +* `-#`: + Compression level, with # being any value from 1 to 12. + Higher values trade compression speed for compression ratio. + Values above 12 are considered the same as 12. + Recommended values are 1 for fast compression (default), + and 9 for high compression. + Speed/compression trade-off will vary depending on data to compress. + Decompression speed remains fast at all settings. + +* `--fast[=#]`: + Switch to ultra-fast compression levels. + The higher the value, the faster the compression speed, at the cost of some compression ratio. + If `=#` is not present, it defaults to `1`. + This setting overrides compression level if one was set previously. + Similarly, if a compression level is set after `--fast`, it overrides it. + +* `--best`: + Set highest compression level. Same as -12. + +* `--favor-decSpeed`: + Generate compressed data optimized for decompression speed. + Compressed data will be larger as a consequence (typically by ~0.5%), + while decompression speed will be improved by 5-20%, depending on use cases. + This option only works in combination with very high compression levels (>=10). + +* `-D dictionaryName`: + Compress, decompress or benchmark using dictionary _dictionaryName_. + Compression and decompression must use the same dictionary to be compatible. + Using a different dictionary during decompression will either + abort due to decompression error, or generate a checksum error. + +* `-f` `--[no-]force`: + This option has several effects: + + If the target file already exists, overwrite it without prompting. + + When used with `--decompress` and `lz4` cannot recognize the type of + the source file, copy the source file as is to standard output. + This allows `lz4cat --force` to be used like `cat (1)` for files + that have not been compressed with `lz4`. + +* `-c` `--stdout` `--to-stdout`: + Force write to standard output, even if it is the console. + +* `-m` `--multiple`: + Multiple input files. + Compressed file names will be appended a `.lz4` suffix. + This mode also reduces notification level. + Can also be used to list multiple files. + `lz4 -m` has a behavior equivalent to `gzip -k` + (it preserves source files by default). + +* `-r` : + operate recursively on directories. + This mode also sets `-m` (multiple input files). + +* `-B#`: + Block size \[4-7\](default : 7)
+ `-B4`= 64KB ; `-B5`= 256KB ; `-B6`= 1MB ; `-B7`= 4MB + +* `-BI`: + Produce independent blocks (default) + +* `-BD`: + Blocks depend on predecessors (improves compression ratio, more noticeable on small blocks) + +* `--[no-]frame-crc`: + Select frame checksum (default:enabled) + +* `--[no-]content-size`: + Header includes original size (default:not present)
+ Note : this option can only be activated when the original size can be + determined, hence for a file. It won't work with unknown source size, + such as stdin or pipe. + +* `--[no-]sparse`: + Sparse mode support (default:enabled on file, disabled on stdout) + +* `-l`: + Use Legacy format (typically for Linux Kernel compression)
+ Note : `-l` is not compatible with `-m` (`--multiple`) nor `-r` + +### Other options + +* `-v` `--verbose`: + Verbose mode + +* `-q` `--quiet`: + Suppress warnings and real-time statistics; + specify twice to suppress errors too + +* `-h` `-H` `--help`: + Display help/long help and exit + +* `-V` `--version`: + Display Version number and exit + +* `-k` `--keep`: + Preserve source files (default behavior) + +* `--rm` : + Delete source files on successful compression or decompression + +* `--` : + Treat all subsequent arguments as files + + +### Benchmark mode + +* `-b#`: + Benchmark file(s), using # compression level + +* `-e#`: + Benchmark multiple compression levels, from b# to e# (included) + +* `-i#`: + Minimum evaluation time in seconds \[1-9\] (default : 3) + + +BUGS +---- + +Report bugs at: https://github.com/lz4/lz4/issues + + +AUTHOR +------ + +Yann Collet diff --git a/programs/lz4cli.c b/programs/lz4cli.c new file mode 100644 index 0000000..523b8a8 --- /dev/null +++ b/programs/lz4cli.c @@ -0,0 +1,788 @@ +/* + LZ4cli - LZ4 Command Line Interface + Copyright (C) Yann Collet 2011-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* + Note : this is stand-alone program. + It is not part of LZ4 compression library, it is a user program of the LZ4 library. + The license of LZ4 library is BSD. + The license of xxHash library is BSD. + The license of this compression CLI program is GPLv2. +*/ + + +/**************************** +* Includes +*****************************/ +#include "platform.h" /* Compiler options, IS_CONSOLE */ +#include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */ +#include/* fprintf, getchar */ +#include /* exit, calloc, free */ +#include /* strcmp, strlen */ +#include "bench.h" /* BMK_benchFile, BMK_SetNbIterations, BMK_SetBlocksize, BMK_SetPause */ +#include "lz4io.h" /* LZ4IO_compressFilename, LZ4IO_decompressFilename, LZ4IO_compressMultipleFilenames */ +#include "lz4hc.h" /* LZ4HC_CLEVEL_MAX */ +#include "lz4.h" /* LZ4_VERSION_STRING */ + + +/***************************** +* Constants +******************************/ +#define COMPRESSOR_NAME "LZ4 command line interface" +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s %i-bits v%s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(void*)*8), LZ4_versionString(), AUTHOR +#define LZ4_EXTENSION ".lz4" +#define LZ4CAT "lz4cat" +#define UNLZ4 "unlz4" +#define LZ4_LEGACY "lz4c" +static int g_lz4c_legacy_commands = 0; + +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +#define LZ4_BLOCKSIZEID_DEFAULT 7 + + +/*-************************************ +* Macros +***************************************/ +#define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static unsigned displayLevel = 2; /* 0 : no display ; 1: errors only ; 2 : downgradable normal ; 3 : non-downgradable normal; 4 : + information */ + + +/*-************************************ +* Exceptions +***************************************/ +#define DEBUG 0 +#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, "\n"); \ + exit(error); \ +} + + +/*-************************************ +* Version modifiers +***************************************/ +#define DEFAULT_COMPRESSOR LZ4IO_compressFilename +#define DEFAULT_DECOMPRESSOR LZ4IO_decompressFilename +int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, int compressionlevel, const LZ4IO_prefs_t* prefs); /* hidden function */ +int LZ4IO_compressMultipleFilenames_Legacy( + const char** inFileNamesTable, int ifntSize, + const char* suffix, + int compressionLevel, const LZ4IO_prefs_t* prefs); + +/*-*************************** +* Functions +*****************************/ +static int usage(const char* exeName) +{ + DISPLAY( "Usage : \n"); + DISPLAY( " %s [arg] [input] [output] \n", exeName); + DISPLAY( "\n"); + DISPLAY( "input : a filename \n"); + DISPLAY( " with no FILE, or when FILE is - or %s, read standard input\n", stdinmark); + DISPLAY( "Arguments : \n"); + DISPLAY( " -1 : Fast compression (default) \n"); + DISPLAY( " -9 : High compression \n"); + DISPLAY( " -d : decompression (default for %s extension)\n", LZ4_EXTENSION); + DISPLAY( " -z : force compression \n"); + DISPLAY( " -D FILE: use FILE as dictionary \n"); + DISPLAY( " -f : overwrite output without prompting \n"); + DISPLAY( " -k : preserve source files(s) (default) \n"); + DISPLAY( "--rm : remove source file(s) after successful de/compression \n"); + DISPLAY( " -h/-H : display help/long help and exit \n"); + return 0; +} + +static int usage_advanced(const char* exeName) +{ + DISPLAY(WELCOME_MESSAGE); + usage(exeName); + DISPLAY( "\n"); + DISPLAY( "Advanced arguments :\n"); + DISPLAY( " -V : display Version number and exit \n"); + DISPLAY( " -v : verbose mode \n"); + DISPLAY( " -q : suppress warnings; specify twice to suppress errors too\n"); + DISPLAY( " -c : force write to standard output, even if it is the console\n"); + DISPLAY( " -t : test compressed file integrity\n"); + DISPLAY( " -m : multiple input files (implies automatic output filenames)\n"); +#ifdef UTIL_HAS_CREATEFILELIST + DISPLAY( " -r : operate recursively on directories (sets also -m) \n"); +#endif + DISPLAY( " -l : compress using Legacy format (Linux kernel compression)\n"); + DISPLAY( " -B# : cut file into blocks of size # bytes [32+] \n"); + DISPLAY( " or predefined block size [4-7] (default: 7) \n"); + DISPLAY( " -BI : Block Independence (default) \n"); + DISPLAY( " -BD : Block dependency (improves compression ratio) \n"); + DISPLAY( " -BX : enable block checksum (default:disabled) \n"); + DISPLAY( "--no-frame-crc : disable stream checksum (default:enabled) \n"); + DISPLAY( "--content-size : compressed frame includes original size (default:not present)\n"); + DISPLAY( "--list FILE : lists information about .lz4 files (useful for files compressed with --content-size flag)\n"); + DISPLAY( "--[no-]sparse : sparse mode (default:enabled on file, disabled on stdout)\n"); + DISPLAY( "--favor-decSpeed: compressed files decompress faster, but are less compressed \n"); + DISPLAY( "--fast[=#]: switch to ultra fast compression level (default: %i)\n", 1); + DISPLAY( "--best : same as -%d\n", LZ4HC_CLEVEL_MAX); + DISPLAY( "Benchmark arguments : \n"); + DISPLAY( " -b# : benchmark file(s), using # compression level (default : 1) \n"); + DISPLAY( " -e# : test all compression levels from -bX to # (default : 1)\n"); + DISPLAY( " -i# : minimum evaluation time in seconds (default : 3s) \n"); + if (g_lz4c_legacy_commands) { + DISPLAY( "Legacy arguments : \n"); + DISPLAY( " -c0 : fast compression \n"); + DISPLAY( " -c1 : high compression \n"); + DISPLAY( " -c2,-hc: very high compression \n"); + DISPLAY( " -y : overwrite output without prompting \n"); + } + return 0; +} + +static int usage_longhelp(const char* exeName) +{ + usage_advanced(exeName); + DISPLAY( "\n"); + DISPLAY( "****************************\n"); + DISPLAY( "***** Advanced comment *****\n"); + DISPLAY( "****************************\n"); + DISPLAY( "\n"); + DISPLAY( "Which values can [output] have ? \n"); + DISPLAY( "---------------------------------\n"); + DISPLAY( "[output] : a filename \n"); + DISPLAY( " '%s', or '-' for standard output (pipe mode)\n", stdoutmark); + DISPLAY( " '%s' to discard output (test mode) \n", NULL_OUTPUT); + DISPLAY( "[output] can be left empty. In this case, it receives the following value :\n"); + DISPLAY( " - if stdout is not the console, then [output] = stdout \n"); + DISPLAY( " - if stdout is console : \n"); + DISPLAY( " + for compression, output to filename%s \n", LZ4_EXTENSION); + DISPLAY( " + for decompression, output to filename without '%s'\n", LZ4_EXTENSION); + DISPLAY( " > if input filename has no '%s' extension : error \n", LZ4_EXTENSION); + DISPLAY( "\n"); + DISPLAY( "Compression levels : \n"); + DISPLAY( "---------------------\n"); + DISPLAY( "-0 ... -2 => Fast compression, all identicals\n"); + DISPLAY( "-3 ... -%d => High compression; higher number == more compression but slower\n", LZ4HC_CLEVEL_MAX); + DISPLAY( "\n"); + DISPLAY( "stdin, stdout and the console : \n"); + DISPLAY( "--------------------------------\n"); + DISPLAY( "To protect the console from binary flooding (bad argument mistake)\n"); + DISPLAY( "%s will refuse to read from console, or write to console \n", exeName); + DISPLAY( "except if '-c' command is specified, to force output to console \n"); + DISPLAY( "\n"); + DISPLAY( "Simple example :\n"); + DISPLAY( "----------------\n"); + DISPLAY( "1 : compress 'filename' fast, using default output name 'filename.lz4'\n"); + DISPLAY( " %s filename\n", exeName); + DISPLAY( "\n"); + DISPLAY( "Short arguments can be aggregated. For example :\n"); + DISPLAY( "----------------------------------\n"); + DISPLAY( "2 : compress 'filename' in high compression mode, overwrite output if exists\n"); + DISPLAY( " %s -9 -f filename \n", exeName); + DISPLAY( " is equivalent to :\n"); + DISPLAY( " %s -9f filename \n", exeName); + DISPLAY( "\n"); + DISPLAY( "%s can be used in 'pure pipe mode'. For example :\n", exeName); + DISPLAY( "-------------------------------------\n"); + DISPLAY( "3 : compress data stream from 'generator', send result to 'consumer'\n"); + DISPLAY( " generator | %s | consumer \n", exeName); + if (g_lz4c_legacy_commands) { + DISPLAY( "\n"); + DISPLAY( "***** Warning ***** \n"); + DISPLAY( "Legacy arguments take precedence. Therefore : \n"); + DISPLAY( "--------------------------------- \n"); + DISPLAY( " %s -hc filename \n", exeName); + DISPLAY( "means 'compress filename in high compression mode' \n"); + DISPLAY( "It is not equivalent to : \n"); + DISPLAY( " %s -h -c filename \n", exeName); + DISPLAY( "which displays help text and exits \n"); + } + return 0; +} + +static int badusage(const char* exeName) +{ + DISPLAYLEVEL(1, "Incorrect parameters\n"); + if (displayLevel >= 1) usage(exeName); + exit(1); +} + + +static void waitEnter(void) +{ + DISPLAY("Press enter to continue...\n"); + (void)getchar(); +} + +static const char* lastNameFromPath(const char* path) +{ + const char* name = path; + if (strrchr(name, '/')) name = strrchr(name, '/') + 1; + if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */ + return name; +} + +/*! exeNameMatch() : + @return : a non-zero value if exeName matches test, excluding the extension + */ +static int exeNameMatch(const char* exeName, const char* test) +{ + return !strncmp(exeName, test, strlen(test)) && + (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.'); +} + +/*! readU32FromChar() : + * @return : unsigned integer value read from input in `char` format + * allows and interprets K, KB, KiB, M, MB and MiB suffix. + * Will also modify `*stringPtr`, advancing it to position where it stopped reading. + * Note : function result can overflow if digit string > MAX_UINT */ +static unsigned readU32FromChar(const char** stringPtr) +{ + unsigned result = 0; + while ((**stringPtr >='0') && (**stringPtr <='9')) { + result *= 10; + result += (unsigned)(**stringPtr - '0'); + (*stringPtr)++ ; + } + if ((**stringPtr=='K') || (**stringPtr=='M')) { + result <<= 10; + if (**stringPtr=='M') result <<= 10; + (*stringPtr)++ ; + if (**stringPtr=='i') (*stringPtr)++; + if (**stringPtr=='B') (*stringPtr)++; + } + return result; +} + +/** longCommandWArg() : + * check if *stringPtr is the same as longCommand. + * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand. + * @return 0 and doesn't modify *stringPtr otherwise. + */ +static int longCommandWArg(const char** stringPtr, const char* longCommand) +{ + size_t const comSize = strlen(longCommand); + int const result = !strncmp(*stringPtr, longCommand, comSize); + if (result) *stringPtr += comSize; + return result; +} + +typedef enum { om_auto, om_compress, om_decompress, om_test, om_bench, om_list } operationMode_e; + +/** determineOpMode() : + * auto-determine operation mode, based on input filename extension + * @return `om_decompress` if input filename has .lz4 extension and `om_compress` otherwise. + */ +static operationMode_e determineOpMode(const char* inputFilename) +{ + size_t const inSize = strlen(inputFilename); + size_t const extSize = strlen(LZ4_EXTENSION); + size_t const extStart= (inSize > extSize) ? inSize-extSize : 0; + if (!strcmp(inputFilename+extStart, LZ4_EXTENSION)) return om_decompress; + else return om_compress; +} + +int main(int argc, const char** argv) +{ + int i, + cLevel=1, + cLevelLast=-10000, + legacy_format=0, + forceStdout=0, + main_pause=0, + multiple_inputs=0, + all_arguments_are_files=0, + operationResult=0; + operationMode_e mode = om_auto; + const char* input_filename = NULL; + const char* output_filename= NULL; + const char* dictionary_filename = NULL; + char* dynNameSpace = NULL; + const char** inFileNames = (const char**)calloc((size_t)argc, sizeof(char*)); + unsigned ifnIdx=0; + LZ4IO_prefs_t* const prefs = LZ4IO_defaultPreferences(); + const char nullOutput[] = NULL_OUTPUT; + const char extension[] = LZ4_EXTENSION; + size_t blockSize = LZ4IO_setBlockSizeID(prefs, LZ4_BLOCKSIZEID_DEFAULT); + const char* const exeName = lastNameFromPath(argv[0]); +#ifdef UTIL_HAS_CREATEFILELIST + const char** extendedFileList = NULL; + char* fileNamesBuf = NULL; + unsigned fileNamesNb, recursive=0; +#endif + + /* Init */ + if (inFileNames==NULL) { + DISPLAY("Allocation error : not enough memory \n"); + return 1; + } + inFileNames[0] = stdinmark; + LZ4IO_setOverwrite(prefs, 0); + + /* predefined behaviors, based on binary/link name */ + if (exeNameMatch(exeName, LZ4CAT)) { + mode = om_decompress; + LZ4IO_setOverwrite(prefs, 1); + LZ4IO_setPassThrough(prefs, 1); + LZ4IO_setRemoveSrcFile(prefs, 0); + forceStdout=1; + output_filename=stdoutmark; + displayLevel=1; + multiple_inputs=1; + } + if (exeNameMatch(exeName, UNLZ4)) { mode = om_decompress; } + if (exeNameMatch(exeName, LZ4_LEGACY)) { g_lz4c_legacy_commands=1; } + + /* command switches */ + for(i=1; i ='0') && (*argument<='9')) { + cLevel = (int)readU32FromChar(&argument); + argument--; + continue; + } + + + switch(argument[0]) + { + /* Display help */ + case 'V': DISPLAYOUT(WELCOME_MESSAGE); goto _cleanup; /* Version */ + case 'h': usage_advanced(exeName); goto _cleanup; + case 'H': usage_longhelp(exeName); goto _cleanup; + + case 'e': + argument++; + cLevelLast = (int)readU32FromChar(&argument); + argument--; + break; + + /* Compression (default) */ + case 'z': mode = om_compress; break; + + case 'D': + if (argument[1] == '\0') { + /* path is next arg */ + if (i + 1 == argc) { + /* there is no next arg */ + badusage(exeName); + } + dictionary_filename = argv[++i]; + } else { + /* path follows immediately */ + dictionary_filename = argument + 1; + } + /* skip to end of argument so that we jump to parsing next argument */ + argument += strlen(argument) - 1; + break; + + /* Use Legacy format (ex : Linux kernel compression) */ + case 'l': legacy_format = 1; blockSize = 8 MB; break; + + /* Decoding */ + case 'd': mode = om_decompress; break; + + /* Force stdout, even if stdout==console */ + case 'c': + forceStdout=1; + output_filename=stdoutmark; + LZ4IO_setPassThrough(prefs, 1); + break; + + /* Test integrity */ + case 't': mode = om_test; break; + + /* Overwrite */ + case 'f': LZ4IO_setOverwrite(prefs, 1); break; + + /* Verbose mode */ + case 'v': displayLevel++; break; + + /* Quiet mode */ + case 'q': if (displayLevel) displayLevel--; break; + + /* keep source file (default anyway, so useless) (for xz/lzma compatibility) */ + case 'k': LZ4IO_setRemoveSrcFile(prefs, 0); break; + + /* Modify Block Properties */ + case 'B': + while (argument[1]!=0) { + int exitBlockProperties=0; + switch(argument[1]) + { + case 'D': LZ4IO_setBlockMode(prefs, LZ4IO_blockLinked); argument++; break; + case 'I': LZ4IO_setBlockMode(prefs, LZ4IO_blockIndependent); argument++; break; + case 'X': LZ4IO_setBlockChecksumMode(prefs, 1); argument ++; break; /* disabled by default */ + default : + if (argument[1] < '0' || argument[1] > '9') { + exitBlockProperties=1; + break; + } else { + unsigned B; + argument++; + B = readU32FromChar(&argument); + argument--; + if (B < 4) badusage(exeName); + if (B <= 7) { + blockSize = LZ4IO_setBlockSizeID(prefs, B); + BMK_setBlockSize(blockSize); + DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); + } else { + if (B < 32) badusage(exeName); + blockSize = LZ4IO_setBlockSize(prefs, B); + BMK_setBlockSize(blockSize); + if (blockSize >= 1024) { + DISPLAYLEVEL(2, "using blocks of size %u KB \n", (U32)(blockSize>>10)); + } else { + DISPLAYLEVEL(2, "using blocks of size %u bytes \n", (U32)(blockSize)); + } + } + break; + } + } + if (exitBlockProperties) break; + } + break; + + /* Benchmark */ + case 'b': mode = om_bench; multiple_inputs=1; + break; + + /* hidden command : benchmark files, but do not fuse result */ + case 'S': BMK_setBenchSeparately(1); + break; + +#ifdef UTIL_HAS_CREATEFILELIST + /* recursive */ + case 'r': recursive=1; +#endif + /* fall-through */ + /* Treat non-option args as input files. See https://code.google.com/p/lz4/issues/detail?id=151 */ + case 'm': multiple_inputs=1; + break; + + /* Modify Nb Seconds (benchmark only) */ + case 'i': + { unsigned iters; + argument++; + iters = readU32FromChar(&argument); + argument--; + BMK_setNotificationLevel(displayLevel); + BMK_setNbSeconds(iters); /* notification if displayLevel >= 3 */ + } + break; + + /* Pause at the end (hidden option) */ + case 'p': main_pause=1; break; + + /* Unrecognised command */ + default : badusage(exeName); + } + } + continue; + } + + /* Store in *inFileNames[] if -m is used. */ + if (multiple_inputs) { inFileNames[ifnIdx++]=argument; continue; } + + /* Store first non-option arg in input_filename to preserve original cli logic. */ + if (!input_filename) { input_filename=argument; continue; } + + /* Second non-option arg in output_filename to preserve original cli logic. */ + if (!output_filename) { + output_filename=argument; + if (!strcmp (output_filename, nullOutput)) output_filename = nulmark; + continue; + } + + /* 3rd non-option arg should not exist */ + DISPLAYLEVEL(1, "Warning : %s won't be used ! Do you want multiple input files (-m) ? \n", argument); + } + + DISPLAYLEVEL(3, WELCOME_MESSAGE); +#ifdef _POSIX_C_SOURCE + DISPLAYLEVEL(4, "_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE); +#endif +#ifdef _POSIX_VERSION + DISPLAYLEVEL(4, "_POSIX_VERSION defined: %ldL\n", (long) _POSIX_VERSION); +#endif +#ifdef PLATFORM_POSIX_VERSION + DISPLAYLEVEL(4, "PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION); +#endif +#ifdef _FILE_OFFSET_BITS + DISPLAYLEVEL(4, "_FILE_OFFSET_BITS defined: %ldL\n", (long) _FILE_OFFSET_BITS); +#endif + if ((mode == om_compress) || (mode == om_bench)) + DISPLAYLEVEL(4, "Blocks size : %u KB\n", (U32)(blockSize>>10)); + + if (multiple_inputs) { + input_filename = inFileNames[0]; +#ifdef UTIL_HAS_CREATEFILELIST + if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */ + extendedFileList = UTIL_createFileList(inFileNames, ifnIdx, &fileNamesBuf, &fileNamesNb); + if (extendedFileList) { + unsigned u; + for (u=0; u try to select one automatically (when possible) */ + while ((!output_filename) && (multiple_inputs==0)) { + if (!IS_CONSOLE(stdout) && mode != om_list) { + /* Default to stdout whenever stdout is not the console. + * Note : this policy may change in the future, therefore don't rely on it ! + * To ensure `stdout` is explicitly selected, use `-c` command flag. + * Conversely, to ensure output will not become `stdout`, use `-m` command flag */ + DISPLAYLEVEL(1, "Warning : using stdout as default output. Do not rely on this behavior: use explicit `-c` instead ! \n"); + output_filename=stdoutmark; + break; + } + if (mode == om_auto) { /* auto-determine compression or decompression, based on file extension */ + mode = determineOpMode(input_filename); + } + if (mode == om_compress) { /* compression to file */ + size_t const l = strlen(input_filename); + dynNameSpace = (char*)calloc(1,l+5); + if (dynNameSpace==NULL) { perror(exeName); exit(1); } + strcpy(dynNameSpace, input_filename); + strcat(dynNameSpace, LZ4_EXTENSION); + output_filename = dynNameSpace; + DISPLAYLEVEL(2, "Compressed filename will be : %s \n", output_filename); + break; + } + if (mode == om_decompress) {/* decompression to file (automatic name will work only if input filename has correct format extension) */ + size_t outl; + size_t const inl = strlen(input_filename); + dynNameSpace = (char*)calloc(1,inl+1); + if (dynNameSpace==NULL) { perror(exeName); exit(1); } + strcpy(dynNameSpace, input_filename); + outl = inl; + if (inl>4) + while ((outl >= inl-4) && (input_filename[outl] == extension[outl-inl+4])) dynNameSpace[outl--]=0; + if (outl != inl-5) { DISPLAYLEVEL(1, "Cannot determine an output filename\n"); badusage(exeName); } + output_filename = dynNameSpace; + DISPLAYLEVEL(2, "Decoding file %s \n", output_filename); + } + break; + } + + if (mode == om_list){ + /* Exit if trying to read from stdin as this isn't supported in this mode */ + if(!strcmp(input_filename, stdinmark)){ + DISPLAYLEVEL(1, "refusing to read from standard input in --list mode\n"); + exit(1); + } + if(!multiple_inputs){ + inFileNames[ifnIdx++] = input_filename; + } + } + else{ + if (multiple_inputs==0) assert(output_filename); + } + /* when multiple_inputs==1, output_filename may simply be useless, + * however, output_filename must be !NULL for next strcmp() tests */ + if (!output_filename) output_filename = "*\\dummy^!//"; + + /* Check if output is defined as console; trigger an error in this case */ + if (!strcmp(output_filename,stdoutmark) && IS_CONSOLE(stdout) && !forceStdout) { + DISPLAYLEVEL(1, "refusing to write to console without -c \n"); + exit(1); + } + /* Downgrade notification level in stdout and multiple file mode */ + if (!strcmp(output_filename,stdoutmark) && (displayLevel==2)) displayLevel=1; + if ((multiple_inputs) && (displayLevel==2)) displayLevel=1; + + /* Auto-determine compression or decompression, based on file extension */ + if (mode == om_auto) { + mode = determineOpMode(input_filename); + } + + /* IO Stream/File */ + LZ4IO_setNotificationLevel((int)displayLevel); + if (ifnIdx == 0) multiple_inputs = 0; + if (mode == om_decompress) { + if (multiple_inputs) { + const char* const dec_extension = !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION; + assert(ifnIdx <= INT_MAX); + operationResult = LZ4IO_decompressMultipleFilenames(inFileNames, (int)ifnIdx, dec_extension, prefs); + } else { + operationResult = DEFAULT_DECOMPRESSOR(input_filename, output_filename, prefs); + } + } else if (mode == om_list){ + operationResult = LZ4IO_displayCompressedFilesInfo(inFileNames, ifnIdx); + } else { /* compression is default action */ + if (legacy_format) { + DISPLAYLEVEL(3, "! Generating LZ4 Legacy format (deprecated) ! \n"); + if(multiple_inputs){ + const char* const leg_extension = !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION; + LZ4IO_compressMultipleFilenames_Legacy(inFileNames, (int)ifnIdx, leg_extension, cLevel, prefs); + } else { + LZ4IO_compressFilename_Legacy(input_filename, output_filename, cLevel, prefs); + } + } else { + if (multiple_inputs) { + const char* const comp_extension = !strcmp(output_filename,stdoutmark) ? stdoutmark : LZ4_EXTENSION; + assert(ifnIdx <= INT_MAX); + operationResult = LZ4IO_compressMultipleFilenames(inFileNames, (int)ifnIdx, comp_extension, cLevel, prefs); + } else { + operationResult = DEFAULT_COMPRESSOR(input_filename, output_filename, cLevel, prefs); + } } } + +_cleanup: + if (main_pause) waitEnter(); + free(dynNameSpace); +#ifdef UTIL_HAS_CREATEFILELIST + if (extendedFileList) { + UTIL_freeFileList(extendedFileList, fileNamesBuf); + inFileNames = NULL; + } +#endif + LZ4IO_freePreferences(prefs); + free((void*)inFileNames); + return operationResult; +} diff --git a/programs/lz4io.c b/programs/lz4io.c new file mode 100644 index 0000000..a274798 --- /dev/null +++ b/programs/lz4io.c @@ -0,0 +1,1677 @@ +/* + LZ4io.c - LZ4 File/Stream Interface + Copyright (C) Yann Collet 2011-2017 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* + Note : this is stand-alone program. + It is not part of LZ4 compression library, it is a user code of the LZ4 library. + - The license of LZ4 library is BSD. + - The license of xxHash library is BSD. + - The license of this source file is GPLv2. +*/ + + +/*-************************************ +* Compiler options +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +#endif +#if defined(__MINGW32__) && !defined(_POSIX_SOURCE) +# define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */ +#endif + + +/***************************** +* Includes +*****************************/ +#include "platform.h" /* Large File Support, SET_BINARY_MODE, SET_SPARSE_FILE_MODE, PLATFORM_POSIX_VERSION, __64BIT__ */ +#include "util.h" /* UTIL_getFileStat, UTIL_setFileStat */ +#include /* fprintf, fopen, fread, stdin, stdout, fflush, getchar */ +#include /* malloc, free */ +#include /* strerror, strcmp, strlen */ +#include /* clock */ +#include /* stat64 */ +#include /* stat64 */ +#include "lz4.h" /* still required for legacy format */ +#include "lz4hc.h" /* still required for legacy format */ +#define LZ4F_STATIC_LINKING_ONLY +#include "lz4frame.h" +#include "lz4io.h" + + +/***************************** +* Constants +*****************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define _1BIT 0x01 +#define _2BITS 0x03 +#define _3BITS 0x07 +#define _4BITS 0x0F +#define _8BITS 0xFF + +#define MAGICNUMBER_SIZE 4 +#define LZ4IO_MAGICNUMBER 0x184D2204 +#define LZ4IO_SKIPPABLE0 0x184D2A50 +#define LZ4IO_SKIPPABLEMASK 0xFFFFFFF0 +#define LEGACY_MAGICNUMBER 0x184C2102 + +#define CACHELINE 64 +#define LEGACY_BLOCKSIZE (8 MB) +#define MIN_STREAM_BUFSIZE (192 KB) +#define LZ4IO_BLOCKSIZEID_DEFAULT 7 +#define LZ4_MAX_DICT_SIZE (64 KB) + + +/************************************** +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static int g_displayLevel = 0; /* 0 : no display ; 1: errors ; 2 : + result + interaction + warnings ; 3 : + progression; 4 : + information */ + +#define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \ + if ( ((clock() - g_time) > refreshRate) \ + || (g_displayLevel>=4) ) { \ + g_time = clock(); \ + DISPLAY(__VA_ARGS__); \ + if (g_displayLevel>=4) fflush(stderr); \ + } } +static const clock_t refreshRate = CLOCKS_PER_SEC / 6; +static clock_t g_time = 0; +#define LZ4IO_STATIC_ASSERT(c) { enum { LZ4IO_static_assert = 1/(int)(!!(c)) }; } /* use after variable declarations */ + + +/************************************** +* Local Parameters +**************************************/ + +struct LZ4IO_prefs_s { + int passThrough; + int overwrite; + int testMode; + int blockSizeId; + size_t blockSize; + int blockChecksum; + int streamChecksum; + int blockIndependence; + int sparseFileSupport; + int contentSizeFlag; + int useDictionary; + unsigned favorDecSpeed; + const char* dictionaryFilename; + int removeSrcFile; +}; + +/************************************** +* Exceptions +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif +#define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); +#define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + exit(error); \ +} + + +/************************************** +* Version modifiers +**************************************/ +#define EXTENDED_ARGUMENTS +#define EXTENDED_HELP +#define EXTENDED_FORMAT +#define DEFAULT_DECOMPRESSOR LZ4IO_decompressLZ4F + + +/* ************************************************** */ +/* ****************** Parameters ******************** */ +/* ************************************************** */ + +LZ4IO_prefs_t* LZ4IO_defaultPreferences(void) +{ + LZ4IO_prefs_t* const ret = (LZ4IO_prefs_t*)malloc(sizeof(*ret)); + if (!ret) EXM_THROW(21, "Allocation error : not enough memory"); + ret->passThrough = 0; + ret->overwrite = 1; + ret->testMode = 0; + ret->blockSizeId = LZ4IO_BLOCKSIZEID_DEFAULT; + ret->blockSize = 0; + ret->blockChecksum = 0; + ret->streamChecksum = 1; + ret->blockIndependence = 1; + ret->sparseFileSupport = 1; + ret->contentSizeFlag = 0; + ret->useDictionary = 0; + ret->favorDecSpeed = 0; + ret->dictionaryFilename = NULL; + ret->removeSrcFile = 0; + return ret; +} + +void LZ4IO_freePreferences(LZ4IO_prefs_t* prefs) +{ + free(prefs); +} + + +int LZ4IO_setDictionaryFilename(LZ4IO_prefs_t* const prefs, const char* dictionaryFilename) +{ + prefs->dictionaryFilename = dictionaryFilename; + prefs->useDictionary = dictionaryFilename != NULL; + return prefs->useDictionary; +} + +/* Default setting : passThrough = 0; return : passThrough mode (0/1) */ +int LZ4IO_setPassThrough(LZ4IO_prefs_t* const prefs, int yes) +{ + prefs->passThrough = (yes!=0); + return prefs->passThrough; +} + + +/* Default setting : overwrite = 1; return : overwrite mode (0/1) */ +int LZ4IO_setOverwrite(LZ4IO_prefs_t* const prefs, int yes) +{ + prefs->overwrite = (yes!=0); + return prefs->overwrite; +} + +/* Default setting : testMode = 0; return : testMode (0/1) */ +int LZ4IO_setTestMode(LZ4IO_prefs_t* const prefs, int yes) +{ + prefs->testMode = (yes!=0); + return prefs->testMode; +} + +/* blockSizeID : valid values : 4-5-6-7 */ +size_t LZ4IO_setBlockSizeID(LZ4IO_prefs_t* const prefs, unsigned bsid) +{ + static const size_t blockSizeTable[] = { 64 KB, 256 KB, 1 MB, 4 MB }; + static const unsigned minBlockSizeID = 4; + static const unsigned maxBlockSizeID = 7; + if ((bsid < minBlockSizeID) || (bsid > maxBlockSizeID)) return 0; + prefs->blockSizeId = (int)bsid; + prefs->blockSize = blockSizeTable[(unsigned)prefs->blockSizeId-minBlockSizeID]; + return prefs->blockSize; +} + +size_t LZ4IO_setBlockSize(LZ4IO_prefs_t* const prefs, size_t blockSize) +{ + static const size_t minBlockSize = 32; + static const size_t maxBlockSize = 4 MB; + unsigned bsid = 0; + if (blockSize < minBlockSize) blockSize = minBlockSize; + if (blockSize > maxBlockSize) blockSize = maxBlockSize; + prefs->blockSize = blockSize; + blockSize--; + /* find which of { 64k, 256k, 1MB, 4MB } is closest to blockSize */ + while (blockSize >>= 2) + bsid++; + if (bsid < 7) bsid = 7; + prefs->blockSizeId = (int)(bsid-3); + return prefs->blockSize; +} + +/* Default setting : 1 == independent blocks */ +int LZ4IO_setBlockMode(LZ4IO_prefs_t* const prefs, LZ4IO_blockMode_t blockMode) +{ + prefs->blockIndependence = (blockMode == LZ4IO_blockIndependent); + return prefs->blockIndependence; +} + +/* Default setting : 0 == no block checksum */ +int LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t* const prefs, int enable) +{ + prefs->blockChecksum = (enable != 0); + return prefs->blockChecksum; +} + +/* Default setting : 1 == checksum enabled */ +int LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t* const prefs, int enable) +{ + prefs->streamChecksum = (enable != 0); + return prefs->streamChecksum; +} + +/* Default setting : 0 (no notification) */ +int LZ4IO_setNotificationLevel(int level) +{ + g_displayLevel = level; + return g_displayLevel; +} + +/* Default setting : 1 (auto: enabled on file, disabled on stdout) */ +int LZ4IO_setSparseFile(LZ4IO_prefs_t* const prefs, int enable) +{ + prefs->sparseFileSupport = 2*(enable!=0); /* 2==force enable */ + return prefs->sparseFileSupport; +} + +/* Default setting : 0 (disabled) */ +int LZ4IO_setContentSize(LZ4IO_prefs_t* const prefs, int enable) +{ + prefs->contentSizeFlag = (enable!=0); + return prefs->contentSizeFlag; +} + +/* Default setting : 0 (disabled) */ +void LZ4IO_favorDecSpeed(LZ4IO_prefs_t* const prefs, int favor) +{ + prefs->favorDecSpeed = (favor!=0); +} + +void LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t* const prefs, unsigned flag) +{ + prefs->removeSrcFile = (flag>0); +} + + + +/* ************************************************************************ ** +** ********************** LZ4 File / Pipe compression ********************* ** +** ************************************************************************ */ + +static int LZ4IO_isSkippableMagicNumber(unsigned int magic) { + return (magic & LZ4IO_SKIPPABLEMASK) == LZ4IO_SKIPPABLE0; +} + + +/** LZ4IO_openSrcFile() : + * condition : `srcFileName` must be non-NULL. + * @result : FILE* to `dstFileName`, or NULL if it fails */ +static FILE* LZ4IO_openSrcFile(const char* srcFileName) +{ + FILE* f; + + if (!strcmp (srcFileName, stdinmark)) { + DISPLAYLEVEL(4,"Using stdin for input\n"); + f = stdin; + SET_BINARY_MODE(stdin); + } else { + f = fopen(srcFileName, "rb"); + if ( f==NULL ) DISPLAYLEVEL(1, "%s: %s \n", srcFileName, strerror(errno)); + } + + return f; +} + +/** FIO_openDstFile() : + * prefs is writable, because sparseFileSupport might be updated. + * condition : `dstFileName` must be non-NULL. + * @result : FILE* to `dstFileName`, or NULL if it fails */ +static FILE* LZ4IO_openDstFile(const char* dstFileName, const LZ4IO_prefs_t* const prefs) +{ + FILE* f; + assert(dstFileName != NULL); + + if (!strcmp (dstFileName, stdoutmark)) { + DISPLAYLEVEL(4, "Using stdout for output \n"); + f = stdout; + SET_BINARY_MODE(stdout); + if (prefs->sparseFileSupport==1) { + DISPLAYLEVEL(4, "Sparse File Support automatically disabled on stdout ;" + " to force-enable it, add --sparse command \n"); + } + } else { + if (!prefs->overwrite && strcmp (dstFileName, nulmark)) { /* Check if destination file already exists */ + FILE* const testf = fopen( dstFileName, "rb" ); + if (testf != NULL) { /* dest exists, prompt for overwrite authorization */ + fclose(testf); + if (g_displayLevel <= 1) { /* No interaction possible */ + DISPLAY("%s already exists; not overwritten \n", dstFileName); + return NULL; + } + DISPLAY("%s already exists; do you wish to overwrite (y/N) ? ", dstFileName); + { int ch = getchar(); + if ((ch!='Y') && (ch!='y')) { + DISPLAY(" not overwritten \n"); + return NULL; + } + while ((ch!=EOF) && (ch!='\n')) ch = getchar(); /* flush rest of input line */ + } } } + f = fopen( dstFileName, "wb" ); + if (f==NULL) DISPLAYLEVEL(1, "%s: %s\n", dstFileName, strerror(errno)); + } + + /* sparse file */ + { int const sparseMode = (prefs->sparseFileSupport - (f==stdout)) > 0; + if (f && sparseMode) { SET_SPARSE_FILE_MODE(f); } + } + + return f; +} + + + +/*************************************** +* Legacy Compression +***************************************/ + +/* unoptimized version; solves endianess & alignment issues */ +static void LZ4IO_writeLE32 (void* p, unsigned value32) +{ + unsigned char* const dstPtr = (unsigned char*)p; + dstPtr[0] = (unsigned char)value32; + dstPtr[1] = (unsigned char)(value32 >> 8); + dstPtr[2] = (unsigned char)(value32 >> 16); + dstPtr[3] = (unsigned char)(value32 >> 24); +} + +static int LZ4IO_LZ4_compress(const char* src, char* dst, int srcSize, int dstSize, int cLevel) +{ + (void)cLevel; + return LZ4_compress_fast(src, dst, srcSize, dstSize, 1); +} + +/* LZ4IO_compressFilename_Legacy : + * This function is intentionally "hidden" (not published in .h) + * It generates compressed streams using the old 'legacy' format */ +int LZ4IO_compressFilename_Legacy(const char* input_filename, const char* output_filename, + int compressionlevel, const LZ4IO_prefs_t* prefs) +{ + typedef int (*compress_f)(const char* src, char* dst, int srcSize, int dstSize, int cLevel); + compress_f const compressionFunction = (compressionlevel < 3) ? LZ4IO_LZ4_compress : LZ4_compress_HC; + unsigned long long filesize = 0; + unsigned long long compressedfilesize = MAGICNUMBER_SIZE; + char* in_buff; + char* out_buff; + const int outBuffSize = LZ4_compressBound(LEGACY_BLOCKSIZE); + FILE* const finput = LZ4IO_openSrcFile(input_filename); + FILE* foutput; + clock_t clockEnd; + + /* Init */ + clock_t const clockStart = clock(); + if (finput == NULL) + EXM_THROW(20, "%s : open file error ", input_filename); + + foutput = LZ4IO_openDstFile(output_filename, prefs); + if (foutput == NULL) { + fclose(finput); + EXM_THROW(20, "%s : open file error ", input_filename); + } + + /* Allocate Memory */ + in_buff = (char*)malloc(LEGACY_BLOCKSIZE); + out_buff = (char*)malloc((size_t)outBuffSize + 4); + if (!in_buff || !out_buff) + EXM_THROW(21, "Allocation error : not enough memory"); + + /* Write Archive Header */ + LZ4IO_writeLE32(out_buff, LEGACY_MAGICNUMBER); + if (fwrite(out_buff, 1, MAGICNUMBER_SIZE, foutput) != MAGICNUMBER_SIZE) + EXM_THROW(22, "Write error : cannot write header"); + + /* Main Loop */ + while (1) { + int outSize; + /* Read Block */ + size_t const inSize = fread(in_buff, (size_t)1, (size_t)LEGACY_BLOCKSIZE, finput); + if (inSize == 0) break; + assert(inSize <= LEGACY_BLOCKSIZE); + filesize += inSize; + + /* Compress Block */ + outSize = compressionFunction(in_buff, out_buff+4, (int)inSize, outBuffSize, compressionlevel); + assert(outSize >= 0); + compressedfilesize += (unsigned long long)outSize+4; + DISPLAYUPDATE(2, "\rRead : %i MB ==> %.2f%% ", + (int)(filesize>>20), (double)compressedfilesize/filesize*100); + + /* Write Block */ + assert(outSize > 0); + assert(outSize < outBuffSize); + LZ4IO_writeLE32(out_buff, (unsigned)outSize); + if (fwrite(out_buff, 1, (size_t)outSize+4, foutput) != (size_t)(outSize+4)) { + EXM_THROW(24, "Write error : cannot write compressed block"); + } } + if (ferror(finput)) EXM_THROW(25, "Error while reading %s ", input_filename); + + /* Status */ + clockEnd = clock(); + if (clockEnd==clockStart) clockEnd+=1; /* avoid division by zero (speed) */ + filesize += !filesize; /* avoid division by zero (ratio) */ + DISPLAYLEVEL(2, "\r%79s\r", ""); /* blank line */ + DISPLAYLEVEL(2,"Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + filesize, compressedfilesize, (double)compressedfilesize / filesize * 100); + { double const seconds = (double)(clockEnd - clockStart) / CLOCKS_PER_SEC; + DISPLAYLEVEL(4,"Done in %.2f s ==> %.2f MB/s\n", seconds, + (double)filesize / seconds / 1024 / 1024); + } + + /* Close & Free */ + free(in_buff); + free(out_buff); + fclose(finput); + if (strcmp(output_filename,stdoutmark)) fclose(foutput); /* do not close stdout */ + + return 0; +} + +#define FNSPACE 30 +/* LZ4IO_compressMultipleFilenames_Legacy : + * This function is intentionally "hidden" (not published in .h) + * It generates multiple compressed streams using the old 'legacy' format */ +int LZ4IO_compressMultipleFilenames_Legacy( + const char** inFileNamesTable, int ifntSize, + const char* suffix, + int compressionLevel, const LZ4IO_prefs_t* prefs) +{ + int i; + int missed_files = 0; + char* dstFileName = (char*)malloc(FNSPACE); + size_t ofnSize = FNSPACE; + const size_t suffixSize = strlen(suffix); + + if (dstFileName == NULL) return ifntSize; /* not enough memory */ + + /* loop on each file */ + for (i=0; i 0); + + if (dictLen > LZ4_MAX_DICT_SIZE) { + dictLen = LZ4_MAX_DICT_SIZE; + } + + *dictSize = dictLen; + + dictStart = (circularBufSize + dictEnd - dictLen) % circularBufSize; + + if (dictStart == 0) { + /* We're in the simple case where the dict starts at the beginning of our circular buffer. */ + dictBuf = circularBuf; + circularBuf = NULL; + } else { + /* Otherwise, we will alloc a new buffer and copy our dict into that. */ + dictBuf = (char *)malloc(dictLen ? dictLen : 1); + if (!dictBuf) EXM_THROW(25, "Allocation error : not enough memory"); + + memcpy(dictBuf, circularBuf + dictStart, circularBufSize - dictStart); + memcpy(dictBuf + circularBufSize - dictStart, circularBuf, dictLen - (circularBufSize - dictStart)); + } + + fclose(dictFile); + free(circularBuf); + + return dictBuf; +} + +static LZ4F_CDict* LZ4IO_createCDict(const LZ4IO_prefs_t* const prefs) +{ + size_t dictionarySize; + void* dictionaryBuffer; + LZ4F_CDict* cdict; + if (!prefs->useDictionary) return NULL; + dictionaryBuffer = LZ4IO_createDict(&dictionarySize, prefs->dictionaryFilename); + if (!dictionaryBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary"); + cdict = LZ4F_createCDict(dictionaryBuffer, dictionarySize); + free(dictionaryBuffer); + return cdict; +} + +static cRess_t LZ4IO_createCResources(const LZ4IO_prefs_t* const prefs) +{ + const size_t blockSize = prefs->blockSize; + cRess_t ress; + + LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&(ress.ctx), LZ4F_VERSION); + if (LZ4F_isError(errorCode)) EXM_THROW(30, "Allocation error : can't create LZ4F context : %s", LZ4F_getErrorName(errorCode)); + + /* Allocate Memory */ + ress.srcBuffer = malloc(blockSize); + ress.srcBufferSize = blockSize; + ress.dstBufferSize = LZ4F_compressFrameBound(blockSize, NULL); /* cover worst case */ + ress.dstBuffer = malloc(ress.dstBufferSize); + if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(31, "Allocation error : not enough memory"); + + ress.cdict = LZ4IO_createCDict(prefs); + + return ress; +} + +static void LZ4IO_freeCResources(cRess_t ress) +{ + free(ress.srcBuffer); + free(ress.dstBuffer); + + LZ4F_freeCDict(ress.cdict); + ress.cdict = NULL; + + { LZ4F_errorCode_t const errorCode = LZ4F_freeCompressionContext(ress.ctx); + if (LZ4F_isError(errorCode)) EXM_THROW(38, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode)); } +} + +/* + * LZ4IO_compressFilename_extRess() + * result : 0 : compression completed correctly + * 1 : missing or pb opening srcFileName + */ +static int +LZ4IO_compressFilename_extRess(cRess_t ress, + const char* srcFileName, const char* dstFileName, + int compressionLevel, const LZ4IO_prefs_t* const io_prefs) +{ + unsigned long long filesize = 0; + unsigned long long compressedfilesize = 0; + FILE* dstFile; + void* const srcBuffer = ress.srcBuffer; + void* const dstBuffer = ress.dstBuffer; + const size_t dstBufferSize = ress.dstBufferSize; + const size_t blockSize = io_prefs->blockSize; + size_t readSize; + LZ4F_compressionContext_t ctx = ress.ctx; /* just a pointer */ + LZ4F_preferences_t prefs; + + /* Init */ + FILE* const srcFile = LZ4IO_openSrcFile(srcFileName); + if (srcFile == NULL) return 1; + dstFile = LZ4IO_openDstFile(dstFileName, io_prefs); + if (dstFile == NULL) { fclose(srcFile); return 1; } + memset(&prefs, 0, sizeof(prefs)); + + /* Set compression parameters */ + prefs.autoFlush = 1; + prefs.compressionLevel = compressionLevel; + prefs.frameInfo.blockMode = (LZ4F_blockMode_t)io_prefs->blockIndependence; + prefs.frameInfo.blockSizeID = (LZ4F_blockSizeID_t)io_prefs->blockSizeId; + prefs.frameInfo.blockChecksumFlag = (LZ4F_blockChecksum_t)io_prefs->blockChecksum; + prefs.frameInfo.contentChecksumFlag = (LZ4F_contentChecksum_t)io_prefs->streamChecksum; + prefs.favorDecSpeed = io_prefs->favorDecSpeed; + if (io_prefs->contentSizeFlag) { + U64 const fileSize = UTIL_getOpenFileSize(srcFile); + prefs.frameInfo.contentSize = fileSize; /* == 0 if input == stdin */ + if (fileSize==0) + DISPLAYLEVEL(3, "Warning : cannot determine input content size \n"); + } + + /* read first block */ + readSize = fread(srcBuffer, (size_t)1, blockSize, srcFile); + if (ferror(srcFile)) EXM_THROW(30, "Error reading %s ", srcFileName); + filesize += readSize; + + /* single-block file */ + if (readSize < blockSize) { + /* Compress in single pass */ + size_t const cSize = LZ4F_compressFrame_usingCDict(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, ress.cdict, &prefs); + if (LZ4F_isError(cSize)) + EXM_THROW(31, "Compression failed : %s", LZ4F_getErrorName(cSize)); + compressedfilesize = cSize; + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", + (unsigned)(filesize>>20), (double)compressedfilesize/(filesize+!filesize)*100); /* avoid division by zero */ + + /* Write Block */ + if (fwrite(dstBuffer, 1, cSize, dstFile) != cSize) { + EXM_THROW(32, "Write error : failed writing single-block compressed frame"); + } } + + else + + /* multiple-blocks file */ + { + /* Write Frame Header */ + size_t const headerSize = LZ4F_compressBegin_usingCDict(ctx, dstBuffer, dstBufferSize, ress.cdict, &prefs); + if (LZ4F_isError(headerSize)) EXM_THROW(33, "File header generation failed : %s", LZ4F_getErrorName(headerSize)); + if (fwrite(dstBuffer, 1, headerSize, dstFile) != headerSize) + EXM_THROW(34, "Write error : cannot write header"); + compressedfilesize += headerSize; + + /* Main Loop - one block at a time */ + while (readSize>0) { + size_t const outSize = LZ4F_compressUpdate(ctx, dstBuffer, dstBufferSize, srcBuffer, readSize, NULL); + if (LZ4F_isError(outSize)) + EXM_THROW(35, "Compression failed : %s", LZ4F_getErrorName(outSize)); + compressedfilesize += outSize; + DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ", + (unsigned)(filesize>>20), (double)compressedfilesize/filesize*100); + + /* Write Block */ + if (fwrite(dstBuffer, 1, outSize, dstFile) != outSize) + EXM_THROW(36, "Write error : cannot write compressed block"); + + /* Read next block */ + readSize = fread(srcBuffer, (size_t)1, (size_t)blockSize, srcFile); + filesize += readSize; + } + if (ferror(srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName); + + /* End of Frame mark */ + { size_t const endSize = LZ4F_compressEnd(ctx, dstBuffer, dstBufferSize, NULL); + if (LZ4F_isError(endSize)) + EXM_THROW(38, "End of frame error : %s", LZ4F_getErrorName(endSize)); + if (fwrite(dstBuffer, 1, endSize, dstFile) != endSize) + EXM_THROW(39, "Write error : cannot write end of frame"); + compressedfilesize += endSize; + } } + + /* Release file handlers */ + fclose (srcFile); + if (strcmp(dstFileName,stdoutmark)) fclose (dstFile); /* do not close stdout */ + + /* Copy owner, file permissions and modification time */ + { stat_t statbuf; + if (strcmp (srcFileName, stdinmark) + && strcmp (dstFileName, stdoutmark) + && strcmp (dstFileName, nulmark) + && UTIL_getFileStat(srcFileName, &statbuf)) { + UTIL_setFileStat(dstFileName, &statbuf); + } } + + if (io_prefs->removeSrcFile) { /* remove source file : --rm */ + if (remove(srcFileName)) + EXM_THROW(40, "Remove error : %s: %s", srcFileName, strerror(errno)); + } + + /* Final Status */ + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2, "Compressed %llu bytes into %llu bytes ==> %.2f%%\n", + filesize, compressedfilesize, + (double)compressedfilesize / (filesize + !filesize /* avoid division by zero */ ) * 100); + + return 0; +} + + +int LZ4IO_compressFilename(const char* srcFileName, const char* dstFileName, int compressionLevel, const LZ4IO_prefs_t* prefs) +{ + UTIL_time_t const timeStart = UTIL_getTime(); + clock_t const cpuStart = clock(); + cRess_t const ress = LZ4IO_createCResources(prefs); + + int const result = LZ4IO_compressFilename_extRess(ress, srcFileName, dstFileName, compressionLevel, prefs); + + /* Free resources */ + LZ4IO_freeCResources(ress); + + /* Final Status */ + { clock_t const cpuEnd = clock(); + double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC; + U64 const timeLength_ns = UTIL_clockSpanNano(timeStart); + double const timeLength_s = (double)timeLength_ns / 1000000000; + DISPLAYLEVEL(4, "Completed in %.2f sec (cpu load : %.0f%%)\n", + timeLength_s, (cpuLoad_s / timeLength_s) * 100); + } + + return result; +} + + +int LZ4IO_compressMultipleFilenames( + const char** inFileNamesTable, int ifntSize, + const char* suffix, + int compressionLevel, + const LZ4IO_prefs_t* prefs) +{ + int i; + int missed_files = 0; + char* dstFileName = (char*)malloc(FNSPACE); + size_t ofnSize = FNSPACE; + const size_t suffixSize = strlen(suffix); + cRess_t ress; + + if (dstFileName == NULL) return ifntSize; /* not enough memory */ + ress = LZ4IO_createCResources(prefs); + + /* loop on each file */ + for (i=0; i = 4 */ +static unsigned LZ4IO_readLE32 (const void* s) +{ + const unsigned char* const srcPtr = (const unsigned char*)s; + unsigned value32 = srcPtr[0]; + value32 += (unsigned)srcPtr[1] << 8; + value32 += (unsigned)srcPtr[2] << 16; + value32 += (unsigned)srcPtr[3] << 24; + return value32; +} + + +static unsigned +LZ4IO_fwriteSparse(FILE* file, + const void* buffer, size_t bufferSize, + int sparseFileSupport, + unsigned storedSkips) +{ + const size_t sizeT = sizeof(size_t); + const size_t maskT = sizeT -1 ; + const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */ + const size_t* ptrT = bufferT; + size_t bufferSizeT = bufferSize / sizeT; + const size_t* const bufferTEnd = bufferT + bufferSizeT; + const size_t segmentSizeT = (32 KB) / sizeT; + int const sparseMode = (sparseFileSupport - (file==stdout)) > 0; + + if (!sparseMode) { /* normal write */ + size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file); + if (sizeCheck != bufferSize) EXM_THROW(70, "Write error : cannot write decoded block"); + return 0; + } + + /* avoid int overflow */ + if (storedSkips > 1 GB) { + int const seekResult = UTIL_fseek(file, 1 GB, SEEK_CUR); + if (seekResult != 0) EXM_THROW(71, "1 GB skip error (sparse file support)"); + storedSkips -= 1 GB; + } + + while (ptrT < bufferTEnd) { + size_t seg0SizeT = segmentSizeT; + size_t nb0T; + + /* count leading zeros */ + if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT; + bufferSizeT -= seg0SizeT; + for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ; + storedSkips += (unsigned)(nb0T * sizeT); + + if (nb0T != seg0SizeT) { /* not all 0s */ + errno = 0; + { int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(72, "Sparse skip error(%d): %s ; try --no-sparse", (int)errno, strerror(errno)); + } + storedSkips = 0; + seg0SizeT -= nb0T; + ptrT += nb0T; + { size_t const sizeCheck = fwrite(ptrT, sizeT, seg0SizeT, file); + if (sizeCheck != seg0SizeT) EXM_THROW(73, "Write error : cannot write decoded block"); + } } + ptrT += seg0SizeT; + } + + if (bufferSize & maskT) { /* size not multiple of sizeT : implies end of block */ + const char* const restStart = (const char*)bufferTEnd; + const char* restPtr = restStart; + size_t const restSize = bufferSize & maskT; + const char* const restEnd = restStart + restSize; + for (; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ; + storedSkips += (unsigned) (restPtr - restStart); + if (restPtr != restEnd) { + int const seekResult = UTIL_fseek(file, storedSkips, SEEK_CUR); + if (seekResult) EXM_THROW(74, "Sparse skip error ; try --no-sparse"); + storedSkips = 0; + { size_t const sizeCheck = fwrite(restPtr, 1, (size_t)(restEnd - restPtr), file); + if (sizeCheck != (size_t)(restEnd - restPtr)) EXM_THROW(75, "Write error : cannot write decoded end of block"); + } } + } + + return storedSkips; +} + +static void LZ4IO_fwriteSparseEnd(FILE* file, unsigned storedSkips) +{ + if (storedSkips>0) { /* implies sparseFileSupport>0 */ + const char lastZeroByte[1] = { 0 }; + if (UTIL_fseek(file, storedSkips-1, SEEK_CUR) != 0) + EXM_THROW(69, "Final skip error (sparse file)\n"); + if (fwrite(lastZeroByte, 1, 1, file) != 1) + EXM_THROW(69, "Write error : cannot write last zero\n"); + } +} + + +static unsigned g_magicRead = 0; /* out-parameter of LZ4IO_decodeLegacyStream() */ +static unsigned long long LZ4IO_decodeLegacyStream(FILE* finput, FILE* foutput, const LZ4IO_prefs_t* prefs) +{ + unsigned long long streamSize = 0; + unsigned storedSkips = 0; + + /* Allocate Memory */ + char* const in_buff = (char*)malloc((size_t)LZ4_compressBound(LEGACY_BLOCKSIZE)); + char* const out_buff = (char*)malloc(LEGACY_BLOCKSIZE); + if (!in_buff || !out_buff) EXM_THROW(51, "Allocation error : not enough memory"); + + /* Main Loop */ + while (1) { + unsigned int blockSize; + + /* Block Size */ + { size_t const sizeCheck = fread(in_buff, 1, 4, finput); + if (sizeCheck == 0) break; /* Nothing to read : file read is completed */ + if (sizeCheck != 4) EXM_THROW(52, "Read error : cannot access block size "); } + blockSize = LZ4IO_readLE32(in_buff); /* Convert to Little Endian */ + if (blockSize > LZ4_COMPRESSBOUND(LEGACY_BLOCKSIZE)) { + /* Cannot read next block : maybe new stream ? */ + g_magicRead = blockSize; + break; + } + + /* Read Block */ + { size_t const sizeCheck = fread(in_buff, 1, blockSize, finput); + if (sizeCheck!=blockSize) EXM_THROW(52, "Read error : cannot access compressed block !"); } + + /* Decode Block */ + { int const decodeSize = LZ4_decompress_safe(in_buff, out_buff, (int)blockSize, LEGACY_BLOCKSIZE); + if (decodeSize < 0) EXM_THROW(53, "Decoding Failed ! Corrupted input detected !"); + streamSize += (unsigned long long)decodeSize; + /* Write Block */ + storedSkips = LZ4IO_fwriteSparse(foutput, out_buff, (size_t)decodeSize, prefs->sparseFileSupport, storedSkips); /* success or die */ + } } + if (ferror(finput)) EXM_THROW(54, "Read error : ferror"); + + LZ4IO_fwriteSparseEnd(foutput, storedSkips); + + /* Free */ + free(in_buff); + free(out_buff); + + return streamSize; +} + + + +typedef struct { + void* srcBuffer; + size_t srcBufferSize; + void* dstBuffer; + size_t dstBufferSize; + FILE* dstFile; + LZ4F_decompressionContext_t dCtx; + void* dictBuffer; + size_t dictBufferSize; +} dRess_t; + +static void LZ4IO_loadDDict(dRess_t* ress, const LZ4IO_prefs_t* const prefs) +{ + if (!prefs->useDictionary) { + ress->dictBuffer = NULL; + ress->dictBufferSize = 0; + return; + } + + ress->dictBuffer = LZ4IO_createDict(&ress->dictBufferSize, prefs->dictionaryFilename); + if (!ress->dictBuffer) EXM_THROW(25, "Dictionary error : could not create dictionary"); +} + +static const size_t LZ4IO_dBufferSize = 64 KB; +static dRess_t LZ4IO_createDResources(const LZ4IO_prefs_t* const prefs) +{ + dRess_t ress; + + /* init */ + LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&ress.dCtx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) EXM_THROW(60, "Can't create LZ4F context : %s", LZ4F_getErrorName(errorCode)); + + /* Allocate Memory */ + ress.srcBufferSize = LZ4IO_dBufferSize; + ress.srcBuffer = malloc(ress.srcBufferSize); + ress.dstBufferSize = LZ4IO_dBufferSize; + ress.dstBuffer = malloc(ress.dstBufferSize); + if (!ress.srcBuffer || !ress.dstBuffer) EXM_THROW(61, "Allocation error : not enough memory"); + + LZ4IO_loadDDict(&ress, prefs); + + ress.dstFile = NULL; + return ress; +} + +static void LZ4IO_freeDResources(dRess_t ress) +{ + LZ4F_errorCode_t errorCode = LZ4F_freeDecompressionContext(ress.dCtx); + if (LZ4F_isError(errorCode)) EXM_THROW(69, "Error : can't free LZ4F context resource : %s", LZ4F_getErrorName(errorCode)); + free(ress.srcBuffer); + free(ress.dstBuffer); + free(ress.dictBuffer); +} + + +static unsigned long long +LZ4IO_decompressLZ4F(dRess_t ress, + FILE* const srcFile, FILE* const dstFile, + const LZ4IO_prefs_t* const prefs) +{ + unsigned long long filesize = 0; + LZ4F_errorCode_t nextToLoad; + unsigned storedSkips = 0; + + /* Init feed with magic number (already consumed from FILE* sFile) */ + { size_t inSize = MAGICNUMBER_SIZE; + size_t outSize= 0; + LZ4IO_writeLE32(ress.srcBuffer, LZ4IO_MAGICNUMBER); + nextToLoad = LZ4F_decompress_usingDict(ress.dCtx, ress.dstBuffer, &outSize, ress.srcBuffer, &inSize, ress.dictBuffer, ress.dictBufferSize, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(62, "Header error : %s", LZ4F_getErrorName(nextToLoad)); + } + + /* Main Loop */ + for (;nextToLoad;) { + size_t readSize; + size_t pos = 0; + size_t decodedBytes = ress.dstBufferSize; + + /* Read input */ + if (nextToLoad > ress.srcBufferSize) nextToLoad = ress.srcBufferSize; + readSize = fread(ress.srcBuffer, 1, nextToLoad, srcFile); + if (!readSize) break; /* reached end of file or stream */ + + while ((pos < readSize) || (decodedBytes == ress.dstBufferSize)) { /* still to read, or still to flush */ + /* Decode Input (at least partially) */ + size_t remaining = readSize - pos; + decodedBytes = ress.dstBufferSize; + nextToLoad = LZ4F_decompress_usingDict(ress.dCtx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, ress.dictBuffer, ress.dictBufferSize, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(66, "Decompression error : %s", LZ4F_getErrorName(nextToLoad)); + pos += remaining; + + /* Write Block */ + if (decodedBytes) { + if (!prefs->testMode) + storedSkips = LZ4IO_fwriteSparse(dstFile, ress.dstBuffer, decodedBytes, prefs->sparseFileSupport, storedSkips); + filesize += decodedBytes; + DISPLAYUPDATE(2, "\rDecompressed : %u MB ", (unsigned)(filesize>>20)); + } + + if (!nextToLoad) break; + } + } + /* can be out because readSize == 0, which could be an fread() error */ + if (ferror(srcFile)) EXM_THROW(67, "Read error"); + + if (!prefs->testMode) LZ4IO_fwriteSparseEnd(dstFile, storedSkips); + if (nextToLoad!=0) EXM_THROW(68, "Unfinished stream"); + + return filesize; +} + + +/* LZ4IO_passThrough: + * just output the same content as input, no decoding. + * This is a capability of zcat, and by extension lz4cat + * MNstore : contain the first MAGICNUMBER_SIZE bytes already read from finput + */ +#define PTSIZE (64 KB) +#define PTSIZET (PTSIZE / sizeof(size_t)) +static unsigned long long +LZ4IO_passThrough(FILE* finput, FILE* foutput, + unsigned char MNstore[MAGICNUMBER_SIZE], + int sparseFileSupport) +{ + size_t buffer[PTSIZET]; + size_t readBytes = 1; + unsigned long long total = MAGICNUMBER_SIZE; + unsigned storedSkips = 0; + + if (fwrite(MNstore, 1, MAGICNUMBER_SIZE, foutput) != MAGICNUMBER_SIZE) { + EXM_THROW(50, "Pass-through write error"); + } + while (readBytes) { + readBytes = fread(buffer, 1, sizeof(buffer), finput); + total += readBytes; + storedSkips = LZ4IO_fwriteSparse(foutput, buffer, readBytes, sparseFileSupport, storedSkips); + } + if (ferror(finput)) EXM_THROW(51, "Read Error"); + + LZ4IO_fwriteSparseEnd(foutput, storedSkips); + return total; +} + + +/** Safely handle cases when (unsigned)offset > LONG_MAX */ +static int fseek_u32(FILE *fp, unsigned offset, int where) +{ + const unsigned stepMax = 1U << 30; + int errorNb = 0; + + if (where != SEEK_CUR) return -1; /* Only allows SEEK_CUR */ + while (offset > 0) { + unsigned s = offset; + if (s > stepMax) s = stepMax; + errorNb = UTIL_fseek(fp, (long) s, SEEK_CUR); + if (errorNb != 0) break; + offset -= s; + } + return errorNb; +} + +#define ENDOFSTREAM ((unsigned long long)-1) +static unsigned long long +selectDecoder(dRess_t ress, + FILE* finput, FILE* foutput, + const LZ4IO_prefs_t* const prefs) +{ + unsigned char MNstore[MAGICNUMBER_SIZE]; + unsigned magicNumber; + static unsigned nbFrames = 0; + + /* init */ + nbFrames++; + + /* Check Archive Header */ + if (g_magicRead) { /* magic number already read from finput (see legacy frame)*/ + magicNumber = g_magicRead; + g_magicRead = 0; + } else { + size_t const nbReadBytes = fread(MNstore, 1, MAGICNUMBER_SIZE, finput); + if (nbReadBytes==0) { nbFrames = 0; return ENDOFSTREAM; } /* EOF */ + if (nbReadBytes != MAGICNUMBER_SIZE) + EXM_THROW(40, "Unrecognized header : Magic Number unreadable"); + magicNumber = LZ4IO_readLE32(MNstore); /* Little Endian format */ + } + if (LZ4IO_isSkippableMagicNumber(magicNumber)) + magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */ + + switch(magicNumber) + { + case LZ4IO_MAGICNUMBER: + return LZ4IO_decompressLZ4F(ress, finput, foutput, prefs); + case LEGACY_MAGICNUMBER: + DISPLAYLEVEL(4, "Detected : Legacy format \n"); + return LZ4IO_decodeLegacyStream(finput, foutput, prefs); + case LZ4IO_SKIPPABLE0: + DISPLAYLEVEL(4, "Skipping detected skippable area \n"); + { size_t const nbReadBytes = fread(MNstore, 1, 4, finput); + if (nbReadBytes != 4) + EXM_THROW(42, "Stream error : skippable size unreadable"); + } + { unsigned const size = LZ4IO_readLE32(MNstore); + int const errorNb = fseek_u32(finput, size, SEEK_CUR); + if (errorNb != 0) + EXM_THROW(43, "Stream error : cannot skip skippable area"); + } + return 0; + EXTENDED_FORMAT; /* macro extension for custom formats */ + default: + if (nbFrames == 1) { /* just started */ + /* Wrong magic number at the beginning of 1st stream */ + if (!prefs->testMode && prefs->overwrite && prefs->passThrough) { + nbFrames = 0; + return LZ4IO_passThrough(finput, foutput, MNstore, prefs->sparseFileSupport); + } + EXM_THROW(44,"Unrecognized header : file cannot be decoded"); + } + { long int const position = ftell(finput); /* only works for files < 2 GB */ + DISPLAYLEVEL(2, "Stream followed by undecodable data "); + if (position != -1L) + DISPLAYLEVEL(2, "at position %i ", (int)position); + DISPLAYLEVEL(2, "\n"); + } + return ENDOFSTREAM; + } +} + + +static int +LZ4IO_decompressSrcFile(dRess_t ress, + const char* input_filename, const char* output_filename, + const LZ4IO_prefs_t* const prefs) +{ + FILE* const foutput = ress.dstFile; + unsigned long long filesize = 0; + + /* Init */ + FILE* const finput = LZ4IO_openSrcFile(input_filename); + if (finput==NULL) return 1; + assert(foutput != NULL); + + /* Loop over multiple streams */ + for ( ; ; ) { /* endless loop, see break condition */ + unsigned long long const decodedSize = + selectDecoder(ress, finput, foutput, prefs); + if (decodedSize == ENDOFSTREAM) break; + filesize += decodedSize; + } + + /* Close input */ + fclose(finput); + if (prefs->removeSrcFile) { /* --rm */ + if (remove(input_filename)) + EXM_THROW(45, "Remove error : %s: %s", input_filename, strerror(errno)); + } + + /* Final Status */ + DISPLAYLEVEL(2, "\r%79s\r", ""); + DISPLAYLEVEL(2, "%-20.20s : decoded %llu bytes \n", input_filename, filesize); + (void)output_filename; + + return 0; +} + + +static int +LZ4IO_decompressDstFile(dRess_t ress, + const char* input_filename, const char* output_filename, + const LZ4IO_prefs_t* const prefs) +{ + stat_t statbuf; + int stat_result = 0; + FILE* const foutput = LZ4IO_openDstFile(output_filename, prefs); + if (foutput==NULL) return 1; /* failure */ + + if ( strcmp(input_filename, stdinmark) + && UTIL_getFileStat(input_filename, &statbuf)) + stat_result = 1; + + ress.dstFile = foutput; + LZ4IO_decompressSrcFile(ress, input_filename, output_filename, prefs); + + fclose(foutput); + + /* Copy owner, file permissions and modification time */ + if ( stat_result != 0 + && strcmp (output_filename, stdoutmark) + && strcmp (output_filename, nulmark)) { + UTIL_setFileStat(output_filename, &statbuf); + /* should return value be read ? or is silent fail good enough ? */ + } + + return 0; +} + + +int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename, const LZ4IO_prefs_t* prefs) +{ + dRess_t const ress = LZ4IO_createDResources(prefs); + clock_t const start = clock(); + + int const missingFiles = LZ4IO_decompressDstFile(ress, input_filename, output_filename, prefs); + + clock_t const end = clock(); + double const seconds = (double)(end - start) / CLOCKS_PER_SEC; + DISPLAYLEVEL(4, "Done in %.2f sec \n", seconds); + + LZ4IO_freeDResources(ress); + return missingFiles; +} + + +int LZ4IO_decompressMultipleFilenames( + const char** inFileNamesTable, int ifntSize, + const char* suffix, + const LZ4IO_prefs_t* prefs) +{ + int i; + int skippedFiles = 0; + int missingFiles = 0; + char* outFileName = (char*)malloc(FNSPACE); + size_t ofnSize = FNSPACE; + size_t const suffixSize = strlen(suffix); + dRess_t ress = LZ4IO_createDResources(prefs); + + if (outFileName==NULL) EXM_THROW(70, "Memory allocation error"); + ress.dstFile = LZ4IO_openDstFile(stdoutmark, prefs); + + for (i=0; i = 4); assert(sizeID <= 7); + buffer[1] = (char)(sizeID + '0'); + buffer[2] = (blockMode == LZ4F_blockIndependent) ? 'I' : 'D'; + buffer[3] = 0; + return buffer; +} + +/* buffer : must be valid memory area of at least 10 bytes */ +static const char* LZ4IO_toHuman(long double size, char *buf) +{ + const char units[] = {"\0KMGTPEZY"}; + size_t i = 0; + for (; size >= 1024; i++) size /= 1024; + sprintf(buf, "%.2Lf%c", size, units[i]); + return buf; +} + +/* Get filename without path prefix */ +static const char* LZ4IO_baseName(const char* input_filename) +{ + const char* b = strrchr(input_filename, '/'); + if (!b) b = strrchr(input_filename, '\\'); + if (!b) return input_filename; + return b + 1; +} + +/* Report frame/s information (--list) in verbose mode (-v). + * Will populate file info with fileName and frameSummary where applicable. + * - TODO : + * + report nb of blocks, hence max. possible decompressed size (when not reported in header) + */ +static LZ4IO_infoResult +LZ4IO_getCompressedFileInfo(LZ4IO_cFileInfo_t* cfinfo, const char* input_filename) +{ + LZ4IO_infoResult result = LZ4IO_format_not_known; /* default result (error) */ + unsigned char buffer[LZ4F_HEADER_SIZE_MAX]; + FILE* const finput = LZ4IO_openSrcFile(input_filename); + + if (finput == NULL) return LZ4IO_not_a_file; + cfinfo->fileSize = UTIL_getOpenFileSize(finput); + + while (!feof(finput)) { + LZ4IO_frameInfo_t frameInfo = LZ4IO_INIT_FRAMEINFO; + unsigned magicNumber; + /* Get MagicNumber */ + { size_t const nbReadBytes = fread(buffer, 1, MAGICNUMBER_SIZE, finput); + if (nbReadBytes == 0) { break; } /* EOF */ + result = LZ4IO_format_not_known; /* default result (error) */ + if (nbReadBytes != MAGICNUMBER_SIZE) { + EXM_THROW(40, "Unrecognized header : Magic Number unreadable"); + } } + magicNumber = LZ4IO_readLE32(buffer); /* Little Endian format */ + if (LZ4IO_isSkippableMagicNumber(magicNumber)) + magicNumber = LZ4IO_SKIPPABLE0; /* fold skippable magic numbers */ + + switch (magicNumber) { + case LZ4IO_MAGICNUMBER: + if (cfinfo->frameSummary.frameType != lz4Frame) cfinfo->eqFrameTypes = 0; + /* Get frame info */ + { const size_t readBytes = fread(buffer + MAGICNUMBER_SIZE, 1, LZ4F_HEADER_SIZE_MIN - MAGICNUMBER_SIZE, finput); + if (!readBytes || ferror(finput)) EXM_THROW(71, "Error reading %s", input_filename); + } + { size_t hSize = LZ4F_headerSize(&buffer, LZ4F_HEADER_SIZE_MIN); + if (LZ4F_isError(hSize)) break; + if (hSize > (LZ4F_HEADER_SIZE_MIN + MAGICNUMBER_SIZE)) { + /* We've already read LZ4F_HEADER_SIZE_MIN so read any extra until hSize*/ + const size_t readBytes = fread(buffer + LZ4F_HEADER_SIZE_MIN, 1, hSize - LZ4F_HEADER_SIZE_MIN, finput); + if (!readBytes || ferror(finput)) EXM_THROW(72, "Error reading %s", input_filename); + } + /* Create decompression context */ + { LZ4F_dctx* dctx; + if ( LZ4F_isError(LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION)) ) break; + { unsigned const frameInfoError = LZ4F_isError(LZ4F_getFrameInfo(dctx, &frameInfo.lz4FrameInfo, buffer, &hSize)); + LZ4F_freeDecompressionContext(dctx); + if (frameInfoError) break; + if ((cfinfo->frameSummary.lz4FrameInfo.blockSizeID != frameInfo.lz4FrameInfo.blockSizeID || + cfinfo->frameSummary.lz4FrameInfo.blockMode != frameInfo.lz4FrameInfo.blockMode) + && cfinfo->frameCount != 0) + cfinfo->eqBlockTypes = 0; + { const unsigned long long totalBlocksSize = LZ4IO_skipBlocksData(finput, + frameInfo.lz4FrameInfo.blockChecksumFlag, + frameInfo.lz4FrameInfo.contentChecksumFlag); + if (totalBlocksSize) { + char bTypeBuffer[5]; + LZ4IO_blockTypeID(frameInfo.lz4FrameInfo.blockSizeID, frameInfo.lz4FrameInfo.blockMode, bTypeBuffer); + DISPLAYLEVEL(3, " %6llu %14s %5s %8s", + cfinfo->frameCount + 1, + LZ4IO_frameTypeNames[frameInfo.frameType], + bTypeBuffer, + frameInfo.lz4FrameInfo.contentChecksumFlag ? "XXH32" : "-"); + if (frameInfo.lz4FrameInfo.contentSize) { + { double const ratio = (double)(totalBlocksSize + hSize) / frameInfo.lz4FrameInfo.contentSize * 100; + DISPLAYLEVEL(3, " %20llu %20llu %9.2f%%\n", + totalBlocksSize + hSize, + frameInfo.lz4FrameInfo.contentSize, + ratio); + } + /* Now we've consumed frameInfo we can use it to store the total contentSize */ + frameInfo.lz4FrameInfo.contentSize += cfinfo->frameSummary.lz4FrameInfo.contentSize; + } + else { + DISPLAYLEVEL(3, " %20llu %20s %9s \n", totalBlocksSize + hSize, "-", "-"); + cfinfo->allContentSize = 0; + } + result = LZ4IO_LZ4F_OK; + } } } } } + break; + case LEGACY_MAGICNUMBER: + frameInfo.frameType = legacyFrame; + if (cfinfo->frameSummary.frameType != legacyFrame && cfinfo->frameCount != 0) cfinfo->eqFrameTypes = 0; + cfinfo->eqBlockTypes = 0; + cfinfo->allContentSize = 0; + { const unsigned long long totalBlocksSize = LZ4IO_skipLegacyBlocksData(finput); + if (totalBlocksSize) { + DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20llu %20s %9s\n", + cfinfo->frameCount + 1, + LZ4IO_frameTypeNames[frameInfo.frameType], + "-", "-", + totalBlocksSize + 4, + "-", "-"); + result = LZ4IO_LZ4F_OK; + } } + break; + case LZ4IO_SKIPPABLE0: + frameInfo.frameType = skippableFrame; + if (cfinfo->frameSummary.frameType != skippableFrame && cfinfo->frameCount != 0) cfinfo->eqFrameTypes = 0; + cfinfo->eqBlockTypes = 0; + cfinfo->allContentSize = 0; + { size_t const nbReadBytes = fread(buffer, 1, 4, finput); + if (nbReadBytes != 4) + EXM_THROW(42, "Stream error : skippable size unreadable"); + } + { unsigned const size = LZ4IO_readLE32(buffer); + int const errorNb = fseek_u32(finput, size, SEEK_CUR); + if (errorNb != 0) + EXM_THROW(43, "Stream error : cannot skip skippable area"); + DISPLAYLEVEL(3, " %6llu %14s %5s %8s %20u %20s %9s\n", + cfinfo->frameCount + 1, + "SkippableFrame", + "-", "-", size + 8, "-", "-"); + + result = LZ4IO_LZ4F_OK; + } + break; + default: + { long int const position = ftell(finput); /* only works for files < 2 GB */ + DISPLAYLEVEL(3, "Stream followed by undecodable data "); + if (position != -1L) + DISPLAYLEVEL(3, "at position %i ", (int)position); + DISPLAYLEVEL(3, "\n"); + } + break; + } + if (result != LZ4IO_LZ4F_OK) break; + cfinfo->frameSummary = frameInfo; + cfinfo->frameCount++; + } /* while (!feof(finput)) */ + fclose(finput); + return result; +} + + +int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx) +{ + int result = 0; + size_t idx = 0; + if (g_displayLevel < 3) { + DISPLAYOUT("%10s %14s %5s %11s %13s %9s %s\n", + "Frames", "Type", "Block", "Compressed", "Uncompressed", "Ratio", "Filename"); + } + for (; idx < ifnIdx; idx++) { + /* Get file info */ + LZ4IO_cFileInfo_t cfinfo = LZ4IO_INIT_CFILEINFO; + cfinfo.fileName = LZ4IO_baseName(inFileNames[idx]); + if (!UTIL_isRegFile(inFileNames[idx])) { + DISPLAYLEVEL(1, "lz4: %s is not a regular file \n", inFileNames[idx]); + return 0; + } + DISPLAYLEVEL(3, "%s(%llu/%llu)\n", cfinfo.fileName, (unsigned long long)idx + 1, (unsigned long long)ifnIdx); + DISPLAYLEVEL(3, " %6s %14s %5s %8s %20s %20s %9s\n", + "Frame", "Type", "Block", "Checksum", "Compressed", "Uncompressed", "Ratio") + { LZ4IO_infoResult const op_result = LZ4IO_getCompressedFileInfo(&cfinfo, inFileNames[idx]); + if (op_result != LZ4IO_LZ4F_OK) { + assert(op_result == LZ4IO_format_not_known); + DISPLAYLEVEL(1, "lz4: %s: File format not recognized \n", inFileNames[idx]); + return 0; + } } + DISPLAYLEVEL(3, "\n"); + if (g_displayLevel < 3) { + /* Display Summary */ + { char buffers[3][10]; + DISPLAYOUT("%10llu %14s %5s %11s %13s ", + cfinfo.frameCount, + cfinfo.eqFrameTypes ? LZ4IO_frameTypeNames[cfinfo.frameSummary.frameType] : "-" , + cfinfo.eqBlockTypes ? LZ4IO_blockTypeID(cfinfo.frameSummary.lz4FrameInfo.blockSizeID, + cfinfo.frameSummary.lz4FrameInfo.blockMode, buffers[0]) : "-", + LZ4IO_toHuman((long double)cfinfo.fileSize, buffers[1]), + cfinfo.allContentSize ? LZ4IO_toHuman((long double)cfinfo.frameSummary.lz4FrameInfo.contentSize, buffers[2]) : "-"); + if (cfinfo.allContentSize) { + double const ratio = (double)cfinfo.fileSize / cfinfo.frameSummary.lz4FrameInfo.contentSize * 100; + DISPLAYOUT("%9.2f%% %s \n", ratio, cfinfo.fileName); + } else { + DISPLAYOUT("%9s %s\n", + "-", + cfinfo.fileName); + } } } /* if (g_displayLevel < 3) */ + } /* for (; idx < ifnIdx; idx++) */ + + return result; +} diff --git a/programs/lz4io.h b/programs/lz4io.h new file mode 100644 index 0000000..d6d7eee --- /dev/null +++ b/programs/lz4io.h @@ -0,0 +1,134 @@ +/* + LZ4io.h - LZ4 File/Stream Interface + Copyright (C) Yann Collet 2011-2016 + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ +/* + Note : this is stand-alone program. + It is not part of LZ4 compression library, it is a user code of the LZ4 library. + - The license of LZ4 library is BSD. + - The license of xxHash library is BSD. + - The license of this source file is GPLv2. +*/ + +#ifndef LZ4IO_H_237902873 +#define LZ4IO_H_237902873 + +/*--- Dependency ---*/ +#include /* size_t */ + + +/* ************************************************** */ +/* Special input/output values */ +/* ************************************************** */ +#define stdinmark "stdin" +#define stdoutmark "stdout" +#define NULL_OUTPUT "null" +#ifdef _WIN32 +#define nulmark "nul" +#else +#define nulmark "/dev/null" +#endif + +/* ************************************************** */ +/* ****************** Type Definitions ************** */ +/* ************************************************** */ + +typedef struct LZ4IO_prefs_s LZ4IO_prefs_t; + +LZ4IO_prefs_t* LZ4IO_defaultPreferences(void); +void LZ4IO_freePreferences(LZ4IO_prefs_t* prefs); + +/* Size in bytes of a legacy block header in little-endian format */ +#define LZIO_LEGACY_BLOCK_HEADER_SIZE 4 + +/* ************************************************** */ +/* ****************** Functions ********************* */ +/* ************************************************** */ + +/* if output_filename == stdoutmark, writes to stdout */ +int LZ4IO_compressFilename(const char* input_filename, const char* output_filename, int compressionlevel, const LZ4IO_prefs_t* prefs); +int LZ4IO_decompressFilename(const char* input_filename, const char* output_filename, const LZ4IO_prefs_t* prefs); + +/* if suffix == stdoutmark, writes to stdout */ +int LZ4IO_compressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, int compressionlevel, const LZ4IO_prefs_t* prefs); +int LZ4IO_decompressMultipleFilenames(const char** inFileNamesTable, int ifntSize, const char* suffix, const LZ4IO_prefs_t* prefs); + + +/* ************************************************** */ +/* ****************** Parameters ******************** */ +/* ************************************************** */ + +int LZ4IO_setDictionaryFilename(LZ4IO_prefs_t* const prefs, const char* dictionaryFilename); + +/* Default setting : passThrough = 0; + return : passThrough mode (0/1) */ +int LZ4IO_setPassThrough(LZ4IO_prefs_t* const prefs, int yes); + +/* Default setting : overwrite = 1; + return : overwrite mode (0/1) */ +int LZ4IO_setOverwrite(LZ4IO_prefs_t* const prefs, int yes); + +/* Default setting : testMode = 0; + return : testMode (0/1) */ +int LZ4IO_setTestMode(LZ4IO_prefs_t* const prefs, int yes); + +/* blockSizeID : valid values : 4-5-6-7 + return : 0 if error, blockSize if OK */ +size_t LZ4IO_setBlockSizeID(LZ4IO_prefs_t* const prefs, unsigned blockSizeID); + +/* blockSize : valid values : 32 -> 4MB + return : 0 if error, actual blocksize if OK */ +size_t LZ4IO_setBlockSize(LZ4IO_prefs_t* const prefs, size_t blockSize); + +/* Default setting : independent blocks */ +typedef enum { LZ4IO_blockLinked=0, LZ4IO_blockIndependent} LZ4IO_blockMode_t; +int LZ4IO_setBlockMode(LZ4IO_prefs_t* const prefs, LZ4IO_blockMode_t blockMode); + +/* Default setting : no block checksum */ +int LZ4IO_setBlockChecksumMode(LZ4IO_prefs_t* const prefs, int xxhash); + +/* Default setting : stream checksum enabled */ +int LZ4IO_setStreamChecksumMode(LZ4IO_prefs_t* const prefs, int xxhash); + +/* Default setting : 0 (no notification) */ +int LZ4IO_setNotificationLevel(int level); + +/* Default setting : 0 (disabled) */ +int LZ4IO_setSparseFile(LZ4IO_prefs_t* const prefs, int enable); + +/* Default setting : 0 == no content size present in frame header */ +int LZ4IO_setContentSize(LZ4IO_prefs_t* const prefs, int enable); + +/* Default setting : 0 == src file preserved */ +void LZ4IO_setRemoveSrcFile(LZ4IO_prefs_t* const prefs, unsigned flag); + +/* Default setting : 0 == favor compression ratio + * Note : 1 only works for high compression levels (10+) */ +void LZ4IO_favorDecSpeed(LZ4IO_prefs_t* const prefs, int favor); + + +/* implement --list + * @return 0 on success, 1 on error */ +int LZ4IO_displayCompressedFilesInfo(const char** inFileNames, size_t ifnIdx); + + +#endif /* LZ4IO_H_237902873 */ diff --git a/programs/platform.h b/programs/platform.h new file mode 100644 index 0000000..ab8300d --- /dev/null +++ b/programs/platform.h @@ -0,0 +1,155 @@ +/* + platform.h - compiler and OS detection + Copyright (C) 2016-present, Przemyslaw Skibinski, Yann Collet + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#ifndef PLATFORM_H_MODULE +#define PLATFORM_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/* ************************************** +* Compiler Options +****************************************/ +#if defined(_MSC_VER) +# define _CRT_SECURE_NO_WARNINGS /* Disable Visual Studio warning messages for fopen, strncpy, strerror */ +# if (_MSC_VER <= 1800) /* (1800 = Visual Studio 2013) */ +# define _CRT_SECURE_NO_DEPRECATE /* VS2005 - must be declared before and */ +# define snprintf sprintf_s /* snprintf unsupported by Visual <= 2013 */ +# endif +#endif + + +/* ************************************** +* Detect 64-bit OS +* http://nadeausoftware.com/articles/2012/02/c_c_tip_how_detect_processor_type_using_compiler_predefined_macros +****************************************/ +#if defined __ia64 || defined _M_IA64 /* Intel Itanium */ \ + || defined __powerpc64__ || defined __ppc64__ || defined __PPC64__ /* POWER 64-bit */ \ + || (defined __sparc && (defined __sparcv9 || defined __sparc_v9__ || defined __arch64__)) || defined __sparc64__ /* SPARC 64-bit */ \ + || defined __x86_64__s || defined _M_X64 /* x86 64-bit */ \ + || defined __arm64__ || defined __aarch64__ || defined __ARM64_ARCH_8__ /* ARM 64-bit */ \ + || (defined __mips && (__mips == 64 || __mips == 4 || __mips == 3)) /* MIPS 64-bit */ \ + || defined _LP64 || defined __LP64__ /* NetBSD, OpenBSD */ || defined __64BIT__ /* AIX */ || defined _ADDR64 /* Cray */ \ + || (defined __SIZEOF_POINTER__ && __SIZEOF_POINTER__ == 8) /* gcc */ +# if !defined(__64BIT__) +# define __64BIT__ 1 +# endif +#endif + + +/* ********************************************************* +* Turn on Large Files support (>4GB) for 32-bit Linux/Unix +***********************************************************/ +#if !defined(__64BIT__) || defined(__MINGW32__) /* No point defining Large file for 64 bit but MinGW-w64 requires it */ +# if !defined(_FILE_OFFSET_BITS) +# define _FILE_OFFSET_BITS 64 /* turn off_t into a 64-bit type for ftello, fseeko */ +# endif +# if !defined(_LARGEFILE_SOURCE) /* obsolete macro, replaced with _FILE_OFFSET_BITS */ +# define _LARGEFILE_SOURCE 1 /* Large File Support extension (LFS) - fseeko, ftello */ +# endif +# if defined(_AIX) || defined(__hpux) +# define _LARGE_FILES /* Large file support on 32-bits AIX and HP-UX */ +# endif +#endif + + +/* ************************************************************ +* Detect POSIX version +* PLATFORM_POSIX_VERSION = -1 for non-Unix e.g. Windows +* PLATFORM_POSIX_VERSION = 0 for Unix-like non-POSIX +* PLATFORM_POSIX_VERSION >= 1 is equal to found _POSIX_VERSION +************************************************************** */ +#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \ + || defined(__midipix__) || defined(__VMS)) +# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1–2001 (SUSv3) conformant */ \ + || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MidnightBSD__) /* BSD distros */ \ + || defined(__HAIKU__) +# define PLATFORM_POSIX_VERSION 200112L +# else +# if defined(__linux__) || defined(__linux) +# ifndef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200809L /* use feature test macro */ +# endif +# endif +# include /* declares _POSIX_VERSION */ +# if defined(_POSIX_VERSION) /* POSIX compliant */ +# define PLATFORM_POSIX_VERSION _POSIX_VERSION +# else +# define PLATFORM_POSIX_VERSION 0 +# endif +# endif +#endif +#if !defined(PLATFORM_POSIX_VERSION) +# define PLATFORM_POSIX_VERSION -1 +#endif + + +/*-********************************************* +* Detect if isatty() and fileno() are available +*********************************************** */ +#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) || (PLATFORM_POSIX_VERSION >= 200112L) || defined(__DJGPP__) +# include /* isatty */ +# define IS_CONSOLE(stdStream) isatty(fileno(stdStream)) +#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__) +# include /* _isatty */ +# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream)) +#elif defined(WIN32) || defined(_WIN32) +# include /* _isatty */ +# include /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include /* FILE */ +static __inline int IS_CONSOLE(FILE* stdStream) +{ + DWORD dummy; + return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy); +} +#else +# define IS_CONSOLE(stdStream) 0 +#endif + + +/****************************** +* OS-specific Includes +***************************** */ +#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) +# include /* _O_BINARY */ +# include /* _setmode, _fileno, _get_osfhandle */ +# if !defined(__DJGPP__) +# include /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */ +# include /* FSCTL_SET_SPARSE */ +# define SET_BINARY_MODE(file) { int unused=_setmode(_fileno(file), _O_BINARY); (void)unused; } +# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); } +# else +# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) +# define SET_SPARSE_FILE_MODE(file) +# endif +#else +# define SET_BINARY_MODE(file) +# define SET_SPARSE_FILE_MODE(file) +#endif + + + +#if defined (__cplusplus) +} +#endif + +#endif /* PLATFORM_H_MODULE */ diff --git a/programs/util.h b/programs/util.h new file mode 100644 index 0000000..733c1ca --- /dev/null +++ b/programs/util.h @@ -0,0 +1,650 @@ +/* + util.h - utility functions + Copyright (C) 2016-present, Przemyslaw Skibinski, Yann Collet + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#ifndef UTIL_H_MODULE +#define UTIL_H_MODULE + +#if defined (__cplusplus) +extern "C" { +#endif + + + +/*-**************************************** +* Dependencies +******************************************/ +#include "platform.h" /* PLATFORM_POSIX_VERSION */ +#include /* size_t, ptrdiff_t */ +#include /* malloc */ +#include /* strlen, strncpy */ +#include /* fprintf, fileno */ +#include +#include /* stat, utime */ +#include /* stat */ +#if defined(_WIN32) +# include /* utime */ +# include /* _chmod */ +#else +# include /* chown, stat */ +# if PLATFORM_POSIX_VERSION < 200809L +# include /* utime */ +# else +# include /* AT_FDCWD */ +# include /* for utimensat */ +# endif +#endif +#include /* time */ +#include /* INT_MAX */ +#include + + + +/*-************************************************************** +* Basic Types +*****************************************************************/ +#if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) ) +# include + typedef uint8_t BYTE; + typedef uint16_t U16; + typedef int16_t S16; + typedef uint32_t U32; + typedef int32_t S32; + typedef uint64_t U64; + typedef int64_t S64; +#else + typedef unsigned char BYTE; + typedef unsigned short U16; + typedef signed short S16; + typedef unsigned int U32; + typedef signed int S32; + typedef unsigned long long U64; + typedef signed long long S64; +#endif + + +/* ************************************************************ +* Avoid fseek()'s 2GiB barrier with MSVC, MacOS, *BSD, MinGW +***************************************************************/ +#if defined(_MSC_VER) && (_MSC_VER >= 1400) +# define UTIL_fseek _fseeki64 +#elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */ +# define UTIL_fseek fseeko +#elif defined(__MINGW32__) && defined(__MSVCRT__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) +# define UTIL_fseek fseeko64 +#else +# define UTIL_fseek fseek +#endif + + +/*-**************************************** +* Sleep functions: Windows - Posix - others +******************************************/ +#if defined(_WIN32) +# include +# define SET_REALTIME_PRIORITY SetPriorityClass(GetCurrentProcess(), REALTIME_PRIORITY_CLASS) +# define UTIL_sleep(s) Sleep(1000*s) +# define UTIL_sleepMilli(milli) Sleep(milli) +#elif PLATFORM_POSIX_VERSION >= 0 /* Unix-like operating system */ +# include +# include /* setpriority */ +# include /* clock_t, nanosleep, clock, CLOCKS_PER_SEC */ +# if defined(PRIO_PROCESS) +# define SET_REALTIME_PRIORITY setpriority(PRIO_PROCESS, 0, -20) +# else +# define SET_REALTIME_PRIORITY /* disabled */ +# endif +# define UTIL_sleep(s) sleep(s) +# if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 199309L)) || (PLATFORM_POSIX_VERSION >= 200112L) /* nanosleep requires POSIX.1-2001 */ +# define UTIL_sleepMilli(milli) { struct timespec t; t.tv_sec=0; t.tv_nsec=milli*1000000ULL; nanosleep(&t, NULL); } +# else +# define UTIL_sleepMilli(milli) /* disabled */ +# endif +#else +# define SET_REALTIME_PRIORITY /* disabled */ +# define UTIL_sleep(s) /* disabled */ +# define UTIL_sleepMilli(milli) /* disabled */ +#endif + + +/*-**************************************** +* stat() functions +******************************************/ +#if defined(_MSC_VER) +# define UTIL_TYPE_stat __stat64 +# define UTIL_stat _stat64 +# define UTIL_fstat _fstat64 +# define UTIL_STAT_MODE_ISREG(st_mode) ((st_mode) & S_IFREG) +#elif defined(__MINGW32__) && defined (__MSVCRT__) +# define UTIL_TYPE_stat _stati64 +# define UTIL_stat _stati64 +# define UTIL_fstat _fstati64 +# define UTIL_STAT_MODE_ISREG(st_mode) ((st_mode) & S_IFREG) +#else +# define UTIL_TYPE_stat stat +# define UTIL_stat stat +# define UTIL_fstat fstat +# define UTIL_STAT_MODE_ISREG(st_mode) (S_ISREG(st_mode)) +#endif + + +/*-**************************************** +* fileno() function +******************************************/ +#if defined(_MSC_VER) +# define UTIL_fileno _fileno +#else +# define UTIL_fileno fileno +#endif + +/* ************************************* +* Constants +***************************************/ +#define LIST_SIZE_INCREASE (8*1024) + + +/*-**************************************** +* Compiler specifics +******************************************/ +#if defined(__INTEL_COMPILER) +# pragma warning(disable : 177) /* disable: message #177: function was declared but never referenced, useful with UTIL_STATIC */ +#endif +#if defined(__GNUC__) +# define UTIL_STATIC static __attribute__((unused)) +#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +# define UTIL_STATIC static inline +#elif defined(_MSC_VER) +# define UTIL_STATIC static __inline +#else +# define UTIL_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */ +#endif + + +/*-**************************************** +* Time functions +******************************************/ +#if defined(_WIN32) /* Windows */ + + typedef LARGE_INTEGER UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { UTIL_time_t x; QueryPerformanceCounter(&x); return x; } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) + fprintf(stderr, "ERROR: QueryPerformanceFrequency() failure\n"); + init = 1; + } + return 1000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static LARGE_INTEGER ticksPerSecond; + static int init = 0; + if (!init) { + if (!QueryPerformanceFrequency(&ticksPerSecond)) + fprintf(stderr, "ERROR: QueryPerformanceFrequency() failure\n"); + init = 1; + } + return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart; + } + +#elif defined(__APPLE__) && defined(__MACH__) + + #include + typedef U64 UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return mach_absolute_time(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return (((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom)) / 1000ULL; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) + { + static mach_timebase_info_data_t rate; + static int init = 0; + if (!init) { + mach_timebase_info(&rate); + init = 1; + } + return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom); + } + +#elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || (defined(__GLIBC__) && ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2) ) ) + + #include + typedef struct timespec UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) + { + UTIL_time_t now; + if (clock_gettime(CLOCK_MONOTONIC, &now)) + fprintf(stderr, "ERROR: Failed to get time\n"); /* we could also exit() */ + return now; + } + UTIL_STATIC UTIL_time_t UTIL_getSpanTime(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t diff; + if (end.tv_nsec < begin.tv_nsec) { + diff.tv_sec = (end.tv_sec - 1) - begin.tv_sec; + diff.tv_nsec = (end.tv_nsec + 1000000000ULL) - begin.tv_nsec; + } else { + diff.tv_sec = end.tv_sec - begin.tv_sec; + diff.tv_nsec = end.tv_nsec - begin.tv_nsec; + } + return diff; + } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + U64 micro = 0; + micro += 1000000ULL * diff.tv_sec; + micro += diff.tv_nsec / 1000ULL; + return micro; + } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t begin, UTIL_time_t end) + { + UTIL_time_t const diff = UTIL_getSpanTime(begin, end); + U64 nano = 0; + nano += 1000000000ULL * diff.tv_sec; + nano += diff.tv_nsec; + return nano; + } + +#else /* relies on standard C (note : clock_t measurements can be wrong when using multi-threading) */ + + typedef clock_t UTIL_time_t; + UTIL_STATIC UTIL_time_t UTIL_getTime(void) { return clock(); } + UTIL_STATIC U64 UTIL_getSpanTimeMicro(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } + UTIL_STATIC U64 UTIL_getSpanTimeNano(UTIL_time_t clockStart, UTIL_time_t clockEnd) { return 1000000000ULL * (clockEnd - clockStart) / CLOCKS_PER_SEC; } +#endif + + +/* returns time span in microseconds */ +UTIL_STATIC U64 UTIL_clockSpanMicro(UTIL_time_t clockStart) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeMicro(clockStart, clockEnd); +} + +/* returns time span in nanoseconds */ +UTIL_STATIC U64 UTIL_clockSpanNano(UTIL_time_t clockStart) +{ + UTIL_time_t const clockEnd = UTIL_getTime(); + return UTIL_getSpanTimeNano(clockStart, clockEnd); +} + +UTIL_STATIC void UTIL_waitForNextTick(void) +{ + UTIL_time_t const clockStart = UTIL_getTime(); + UTIL_time_t clockEnd; + do { + clockEnd = UTIL_getTime(); + } while (UTIL_getSpanTimeNano(clockStart, clockEnd) == 0); +} + + + +/*-**************************************** +* File functions +******************************************/ +#if defined(_MSC_VER) + #define chmod _chmod + typedef struct __stat64 stat_t; +#else + typedef struct stat stat_t; +#endif + + +UTIL_STATIC int UTIL_isRegFile(const char* infilename); + + +UTIL_STATIC int UTIL_setFileStat(const char *filename, stat_t *statbuf) +{ + int res = 0; + + if (!UTIL_isRegFile(filename)) + return -1; + + { +#if defined(_WIN32) || (PLATFORM_POSIX_VERSION < 200809L) + struct utimbuf timebuf; + timebuf.actime = time(NULL); + timebuf.modtime = statbuf->st_mtime; + res += utime(filename, &timebuf); /* set access and modification times */ +#else + struct timespec timebuf[2] = {}; + timebuf[0].tv_nsec = UTIME_NOW; + timebuf[1].tv_sec = statbuf->st_mtime; + res += utimensat(AT_FDCWD, filename, timebuf, 0); /* set access and modification times */ +#endif + } + +#if !defined(_WIN32) + res += chown(filename, statbuf->st_uid, statbuf->st_gid); /* Copy ownership */ +#endif + + res += chmod(filename, statbuf->st_mode & 07777); /* Copy file permissions */ + + errno = 0; + return -res; /* number of errors is returned */ +} + + +UTIL_STATIC int UTIL_getFileStat(const char* infilename, stat_t *statbuf) +{ + int r; +#if defined(_MSC_VER) + r = _stat64(infilename, statbuf); + if (r || !(statbuf->st_mode & S_IFREG)) return 0; /* No good... */ +#else + r = stat(infilename, statbuf); + if (r || !S_ISREG(statbuf->st_mode)) return 0; /* No good... */ +#endif + return 1; +} + + +UTIL_STATIC int UTIL_isRegFile(const char* infilename) +{ + stat_t statbuf; + return UTIL_getFileStat(infilename, &statbuf); /* Only need to know whether it is a regular file */ +} + + +UTIL_STATIC U32 UTIL_isDirectory(const char* infilename) +{ + int r; + stat_t statbuf; +#if defined(_MSC_VER) + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +UTIL_STATIC U64 UTIL_getOpenFileSize(FILE* file) +{ + int r; + int fd; + struct UTIL_TYPE_stat statbuf; + + fd = UTIL_fileno(file); + if (fd < 0) { + perror("fileno"); + exit(1); + } + r = UTIL_fstat(fd, &statbuf); + if (r || !UTIL_STAT_MODE_ISREG(statbuf.st_mode)) return 0; /* No good... */ + return (U64)statbuf.st_size; +} + + +UTIL_STATIC U64 UTIL_getFileSize(const char* infilename) +{ + int r; + struct UTIL_TYPE_stat statbuf; + + r = UTIL_stat(infilename, &statbuf); + if (r || !UTIL_STAT_MODE_ISREG(statbuf.st_mode)) return 0; /* No good... */ + return (U64)statbuf.st_size; +} + + +UTIL_STATIC U64 UTIL_getTotalFileSize(const char** fileNamesTable, unsigned nbFiles) +{ + U64 total = 0; + unsigned n; + for (n=0; n = *bufEnd) { + ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); FindClose(hFile); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos)); + *pos += pathLength + 1; + nbFiles++; + } + } + free(path); + } while (FindNextFileA(hFile, &cFile)); + + FindClose(hFile); + assert(nbFiles < INT_MAX); + return (int)nbFiles; +} + +#elif defined(__linux__) || (PLATFORM_POSIX_VERSION >= 200112L) /* opendir, readdir require POSIX.1-2001 */ +# define UTIL_HAS_CREATEFILELIST +# include /* opendir, readdir */ +# include /* strerror, memcpy */ + +UTIL_STATIC int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd) +{ + DIR* dir; + struct dirent * entry; + int dirLength, nbFiles = 0; + + if (!(dir = opendir(dirName))) { + fprintf(stderr, "Cannot open directory '%s': %s\n", dirName, strerror(errno)); + return 0; + } + + dirLength = (int)strlen(dirName); + errno = 0; + while ((entry = readdir(dir)) != NULL) { + char* path; + int fnameLength, pathLength; + if (strcmp (entry->d_name, "..") == 0 || + strcmp (entry->d_name, ".") == 0) continue; + fnameLength = (int)strlen(entry->d_name); + path = (char*) malloc(dirLength + fnameLength + 2); + if (!path) { closedir(dir); return 0; } + memcpy(path, dirName, dirLength); + path[dirLength] = '/'; + memcpy(path+dirLength+1, entry->d_name, fnameLength); + pathLength = dirLength+1+fnameLength; + path[pathLength] = 0; + + if (UTIL_isDirectory(path)) { + nbFiles += UTIL_prepareFileList(path, bufStart, pos, bufEnd); /* Recursively call "UTIL_prepareFileList" with the new path. */ + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } else { + if (*bufStart + *pos + pathLength >= *bufEnd) { + ptrdiff_t newListSize = (*bufEnd - *bufStart) + LIST_SIZE_INCREASE; + *bufStart = (char*)UTIL_realloc(*bufStart, newListSize); + *bufEnd = *bufStart + newListSize; + if (*bufStart == NULL) { free(path); closedir(dir); return 0; } + } + if (*bufStart + *pos + pathLength < *bufEnd) { + strncpy(*bufStart + *pos, path, *bufEnd - (*bufStart + *pos)); + *pos += pathLength + 1; + nbFiles++; + } + } + free(path); + errno = 0; /* clear errno after UTIL_isDirectory, UTIL_prepareFileList */ + } + + if (errno != 0) { + fprintf(stderr, "readdir(%s) error: %s\n", dirName, strerror(errno)); + free(*bufStart); + *bufStart = NULL; + } + closedir(dir); + return nbFiles; +} + +#else + +UTIL_STATIC int UTIL_prepareFileList(const char* dirName, char** bufStart, size_t* pos, char** bufEnd) +{ + (void)bufStart; (void)bufEnd; (void)pos; + fprintf(stderr, "Directory %s ignored (compiled without _WIN32 or _POSIX_C_SOURCE)\n", dirName); + return 0; +} + +#endif /* #ifdef _WIN32 */ + +/* + * UTIL_createFileList - takes a list of files and directories (params: inputNames, inputNamesNb), scans directories, + * and returns a new list of files (params: return value, allocatedBuffer, allocatedNamesNb). + * After finishing usage of the list the structures should be freed with UTIL_freeFileList(params: return value, allocatedBuffer) + * In case of error UTIL_createFileList returns NULL and UTIL_freeFileList should not be called. + */ +UTIL_STATIC const char** +UTIL_createFileList(const char** inputNames, unsigned inputNamesNb, + char** allocatedBuffer, unsigned* allocatedNamesNb) +{ + size_t pos; + unsigned i, nbFiles; + char* buf = (char*)malloc(LIST_SIZE_INCREASE); + size_t bufSize = LIST_SIZE_INCREASE; + const char** fileTable; + + if (!buf) return NULL; + + for (i=0, pos=0, nbFiles=0; i = bufSize) { + while (pos + len >= bufSize) bufSize += LIST_SIZE_INCREASE; + buf = (char*)UTIL_realloc(buf, bufSize); + if (!buf) return NULL; + } + assert(pos + len < bufSize); + memcpy(buf + pos, inputNames[i], len); + pos += len; + nbFiles++; + } else { + char* bufend = buf + bufSize; + nbFiles += (unsigned)UTIL_prepareFileList(inputNames[i], &buf, &pos, &bufend); + if (buf == NULL) return NULL; + assert(bufend > buf); + bufSize = (size_t)(bufend - buf); + } } + + if (nbFiles == 0) { free(buf); return NULL; } + + fileTable = (const char**)malloc(((size_t)nbFiles+1) * sizeof(const char*)); + if (!fileTable) { free(buf); return NULL; } + + for (i=0, pos=0; i bufSize) { + free(buf); + free((void*)fileTable); + return NULL; + } /* can this happen ? */ + + *allocatedBuffer = buf; + *allocatedNamesNb = nbFiles; + + return fileTable; +} + + +UTIL_STATIC void +UTIL_freeFileList(const char** filenameTable, char* allocatedBuffer) +{ + if (allocatedBuffer) free(allocatedBuffer); + if (filenameTable) free((void*)filenameTable); +} + + +#if defined (__cplusplus) +} +#endif + +#endif /* UTIL_H_MODULE */ diff --git a/tests/.gitignore b/tests/.gitignore new file mode 100644 index 0000000..99351af --- /dev/null +++ b/tests/.gitignore @@ -0,0 +1,22 @@ + +# build artefacts +datagen +frametest +frametest32 +fullbench +fullbench32 +fuzzer +fuzzer32 +fasttest +roundTripTest +checkTag +checkFrame +decompress-partial + +# test artefacts +tmp* +versionsTest +lz4_all.c + +# local tests +afl diff --git a/tests/COPYING b/tests/COPYING new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/tests/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..6eee132 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,544 @@ +# ########################################################################## +# LZ4 programs - Makefile +# Copyright (C) Yann Collet 2011-present +# +# GPL v2 License +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +# +# You can contact the author at : +# - LZ4 homepage : http://www.lz4.org +# - LZ4 source repository : https://github.com/lz4/lz4 +# ########################################################################## +# fuzzer : Test tool, to check lz4 integrity on target platform +# frametest : Test tool, to check lz4frame integrity on target platform +# fullbench : Precisely measure speed for each LZ4 function variant +# datagen : generates synthetic data samples for tests & benchmarks +# ########################################################################## + +LZ4DIR := ../lib +PRGDIR := ../programs +TESTDIR := versionsTest +PYTHON ?= python3 + +DEBUGLEVEL?= 1 +DEBUGFLAGS = -g -DLZ4_DEBUG=$(DEBUGLEVEL) +CFLAGS ?= -O3 # can select custom optimization flags. Example : CFLAGS=-O2 make +CFLAGS += -Wall -Wextra -Wundef -Wcast-qual -Wcast-align -Wshadow \ + -Wswitch-enum -Wdeclaration-after-statement -Wstrict-prototypes \ + -Wpointer-arith -Wstrict-aliasing=1 +CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS) +CPPFLAGS+= -I$(LZ4DIR) -I$(PRGDIR) -DXXH_NAMESPACE=LZ4_ +FLAGS = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) + +include ../Makefile.inc + +LZ4 := $(PRGDIR)/lz4$(EXT) + + +# Default test parameters +TEST_FILES := COPYING +FUZZER_TIME := -T90s +NB_LOOPS ?= -i1 + +.PHONY: default +default: all + +all: fullbench fuzzer frametest roundTripTest datagen checkFrame decompress-partial + +all32: CFLAGS+=-m32 +all32: all + +lz4: + $(MAKE) -C $(PRGDIR) $@ CFLAGS="$(CFLAGS)" + +lib liblz4.pc: + $(MAKE) -C $(LZ4DIR) $@ CFLAGS="$(CFLAGS)" + +lz4c unlz4 lz4cat: lz4 + $(LN_SF) $(LZ4) $(PRGDIR)/$@ + +lz4c32: # create a 32-bits version for 32/64 interop tests + $(MAKE) -C $(PRGDIR) $@ CFLAGS="-m32 $(CFLAGS)" + +%.o : $(LZ4DIR)/%.c $(LZ4DIR)/%.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) $< -o $@ + +fullbench : DEBUGLEVEL=0 +fullbench : lz4.o lz4hc.o lz4frame.o xxhash.o fullbench.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +$(LZ4DIR)/liblz4.a: + $(MAKE) -C $(LZ4DIR) liblz4.a + +fullbench-lib: fullbench.c $(LZ4DIR)/liblz4.a + $(CC) $(FLAGS) $^ -o $@$(EXT) + +fullbench-dll: fullbench.c $(LZ4DIR)/xxhash.c + $(MAKE) -C $(LZ4DIR) liblz4 + $(CC) $(FLAGS) $^ -o $@$(EXT) -DLZ4_DLL_IMPORT=1 $(LZ4DIR)/dll/$(LIBLZ4).dll + +# test LZ4_USER_MEMORY_FUNCTIONS +fullbench-wmalloc: CPPFLAGS += -DLZ4_USER_MEMORY_FUNCTIONS +fullbench-wmalloc: fullbench + +fuzzer : lz4.o lz4hc.o xxhash.o fuzzer.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +frametest: lz4frame.o lz4.o lz4hc.o xxhash.o frametest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +roundTripTest : lz4.o lz4hc.o xxhash.o roundTripTest.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +datagen : $(PRGDIR)/datagen.c datagencli.c + $(CC) $(FLAGS) -I$(PRGDIR) $^ -o $@$(EXT) + +checkFrame : lz4frame.o lz4.o lz4hc.o xxhash.o checkFrame.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +decompress-partial: lz4.o decompress-partial.c + $(CC) $(FLAGS) $^ -o $@$(EXT) + +.PHONY: clean +clean: + @$(MAKE) -C $(LZ4DIR) $@ > $(VOID) + @$(MAKE) -C $(PRGDIR) $@ > $(VOID) + @$(RM) -rf core *.o *.test tmp* \ + fullbench-dll$(EXT) fullbench-lib$(EXT) \ + fullbench$(EXT) fullbench32$(EXT) \ + fuzzer$(EXT) fuzzer32$(EXT) \ + frametest$(EXT) frametest32$(EXT) \ + fasttest$(EXT) roundTripTest$(EXT) \ + datagen$(EXT) checkTag$(EXT) \ + frameTest$(EXT) decompress-partial$(EXT) \ + lz4_all.c + @$(RM) -rf $(TESTDIR) + @echo Cleaning completed + +.PHONY: versionsTest +versionsTest: + $(PYTHON) test-lz4-versions.py + +.PHONY: listTest +listTest: lz4 + QEMU_SYS=$(QEMU_SYS) $(PYTHON) test-lz4-list.py + +checkTag: checkTag.c $(LZ4DIR)/lz4.h + $(CC) $(FLAGS) $< -o $@$(EXT) + +#----------------------------------------------------------------------------- +# validated only for Linux, OSX, BSD, Hurd and Solaris targets +#----------------------------------------------------------------------------- +ifeq ($(POSIX_ENV),Yes) + +MD5:=md5sum +ifneq (,$(filter $(shell uname), Darwin )) +MD5:=md5 -r +endif + +# note : we should probably settle on a single compare utility +CMP:=cmp +DIFF:=diff +ifneq (,$(filter $(shell uname),SunOS)) +DIFF:=gdiff +endif + +CAT:=cat +DD:=dd +DATAGEN:=./datagen + +.PHONY: list +list: + @$(MAKE) -pRrq -f $(lastword $(MAKEFILE_LIST)) : 2>/dev/null | awk -v RS= -F: '/^# File/,/^# Finished Make data base/ {if ($$1 !~ "^[#.]") {print $$1}}' | sort | egrep -v -e '^[^[:alnum:]]' -e '^$@$$' | xargs + +.PHONY: check +check: test-lz4-essentials + +.PHONY: test +test: test-lz4 test-lz4c test-frametest test-fullbench test-fuzzer test-install test-amalgamation listTest test-decompress-partial + +.PHONY: test32 +test32: CFLAGS+=-m32 +test32: test + +test-amalgamation: lz4_all.o + +lz4_all.c: $(LZ4DIR)/lz4.c $(LZ4DIR)/lz4hc.c $(LZ4DIR)/lz4frame.c + $(CAT) $^ > $@ + +test-install: lz4 lib liblz4.pc + lz4_root=.. ./test_install.sh + +test-lz4-sparse: lz4 datagen + @echo "\n ---- test sparse file support ----" + $(DATAGEN) -g5M -P100 > tmplsdg5M + $(LZ4) -B4D tmplsdg5M -c | $(LZ4) -dv --sparse > tmplscB4 + $(DIFF) -s tmplsdg5M tmplscB4 + $(LZ4) -B5D tmplsdg5M -c | $(LZ4) -dv --sparse > tmplscB5 + $(DIFF) -s tmplsdg5M tmplscB5 + $(LZ4) -B6D tmplsdg5M -c | $(LZ4) -dv --sparse > tmplscB6 + $(DIFF) -s tmplsdg5M tmplscB6 + $(LZ4) -B7D tmplsdg5M -c | $(LZ4) -dv --sparse > tmplscB7 + $(DIFF) -s tmplsdg5M tmplscB7 + $(LZ4) tmplsdg5M -c | $(LZ4) -dv --no-sparse > tmplsnosparse + $(DIFF) -s tmplsdg5M tmplsnosparse + ls -ls tmpls* + $(DATAGEN) -s1 -g1200007 -P100 | $(LZ4) | $(LZ4) -dv --sparse > tmplsodd # Odd size file (to generate non-full last block) + $(DATAGEN) -s1 -g1200007 -P100 | $(DIFF) -s - tmplsodd + ls -ls tmplsodd + @$(RM) tmpls* + @echo "\n Compatibility with Console :" + echo "Hello World 1 !" | $(LZ4) | $(LZ4) -d -c + echo "Hello World 2 !" | $(LZ4) | $(LZ4) -d | $(CAT) + echo "Hello World 3 !" | $(LZ4) --no-frame-crc | $(LZ4) -d -c + @echo "\n Compatibility with Append :" + $(DATAGEN) -P100 -g1M > tmplsdg1M + $(CAT) tmplsdg1M tmplsdg1M > tmpls2M + $(LZ4) -B5 -v tmplsdg1M tmplsc + $(LZ4) -d -v tmplsc tmplsr + $(LZ4) -d -v tmplsc -c >> tmplsr + ls -ls tmp* + $(DIFF) tmpls2M tmplsr + @$(RM) tmpls* + +test-lz4-contentSize: lz4 datagen + @echo "\n ---- test original size support ----" + $(DATAGEN) -g15M > tmplc1 + $(LZ4) -v tmplc1 -c | $(LZ4) -t + $(LZ4) -v --content-size tmplc1 -c | $(LZ4) -d > tmplc2 + $(DIFF) tmplc1 tmplc2 + $(LZ4) -f tmplc1 -c > tmplc1.lz4 + $(LZ4) --content-size tmplc1 -c > tmplc2.lz4 + ! $(DIFF) tmplc1.lz4 tmplc2.lz4 # must differ, due to content size + $(LZ4) --content-size < tmplc1 > tmplc3.lz4 + $(DIFF) tmplc2.lz4 tmplc3.lz4 # both must contain content size + $(CAT) tmplc1 | $(LZ4) > tmplc4.lz4 + $(DIFF) tmplc1.lz4 tmplc4.lz4 # both don't have content size + $(CAT) tmplc1 | $(LZ4) --content-size > tmplc5.lz4 # can't determine content size + $(DIFF) tmplc1.lz4 tmplc5.lz4 # both don't have content size + @$(RM) tmplc* + +test-lz4-frame-concatenation: lz4 datagen + @echo "\n ---- test frame concatenation ----" + @echo -n > tmp-lfc-empty + @echo hi > tmp-lfc-nonempty + $(CAT) tmp-lfc-nonempty tmp-lfc-empty tmp-lfc-nonempty > tmp-lfc-src + $(LZ4) -zq tmp-lfc-empty -c > tmp-lfc-empty.lz4 + $(LZ4) -zq tmp-lfc-nonempty -c > tmp-lfc-nonempty.lz4 + $(CAT) tmp-lfc-nonempty.lz4 tmp-lfc-empty.lz4 tmp-lfc-nonempty.lz4 > tmp-lfc-concat.lz4 + $(LZ4) -d tmp-lfc-concat.lz4 -c > tmp-lfc-result + $(CMP) tmp-lfc-src tmp-lfc-result + @$(RM) tmp-lfc-* + @echo frame concatenation test completed + +test-lz4-multiple: lz4 datagen + @echo "\n ---- test multiple files ----" + @$(DATAGEN) -s1 > tmp-tlm1 2> $(VOID) + @$(DATAGEN) -s2 -g100K > tmp-tlm2 2> $(VOID) + @$(DATAGEN) -s3 -g200K > tmp-tlm3 2> $(VOID) + # compress multiple files : one .lz4 per source file + $(LZ4) -f -m tmp-tlm* + test -f tmp-tlm1.lz4 + test -f tmp-tlm2.lz4 + test -f tmp-tlm3.lz4 + # decompress multiple files : one output file per .lz4 + mv tmp-tlm1 tmp-tlm1-orig + mv tmp-tlm2 tmp-tlm2-orig + mv tmp-tlm3 tmp-tlm3-orig + $(LZ4) -d -f -m tmp-tlm*.lz4 + $(CMP) tmp-tlm1 tmp-tlm1-orig # must be identical + $(CMP) tmp-tlm2 tmp-tlm2-orig + $(CMP) tmp-tlm3 tmp-tlm3-orig + # compress multiple files into stdout + $(CAT) tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 > tmp-tlm-concat1 + $(RM) *.lz4 + $(LZ4) -m tmp-tlm1 tmp-tlm2 tmp-tlm3 -c > tmp-tlm-concat2 + test ! -f tmp-tlm1.lz4 # must not create .lz4 artefact + $(CMP) tmp-tlm-concat1 tmp-tlm-concat2 # must be equivalent + # decompress multiple files into stdout + $(RM) tmp-tlm-concat1 tmp-tlm-concat2 + $(LZ4) -f -m tmp-tlm1 tmp-tlm2 tmp-tlm3 # generate .lz4 to decompress + $(CAT) tmp-tlm1 tmp-tlm2 tmp-tlm3 > tmp-tlm-concat1 # create concatenated reference + $(RM) tmp-tlm1 tmp-tlm2 tmp-tlm3 + $(LZ4) -d -m tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 -c > tmp-tlm-concat2 + test ! -f tmp-tlm1 # must not create file artefact + $(CMP) tmp-tlm-concat1 tmp-tlm-concat2 # must be equivalent + # compress multiple files, one of which is absent (must fail) + ! $(LZ4) -f -m tmp-tlm-concat1 notHere tmp-tlm-concat2 # must fail : notHere not present + @$(RM) tmp-tlm* + +test-lz4-multiple-legacy: lz4 datagen + @echo "\n ---- test multiple files (Legacy format) ----" + @$(DATAGEN) -s1 > tmp-tlm1 2> $(VOID) + @$(DATAGEN) -s2 -g100K > tmp-tlm2 2> $(VOID) + @$(DATAGEN) -s3 -g200K > tmp-tlm3 2> $(VOID) + # compress multiple files using legacy format: one .lz4 per source file + $(LZ4) -f -l -m tmp-tlm* + test -f tmp-tlm1.lz4 + test -f tmp-tlm2.lz4 + test -f tmp-tlm3.lz4 + # decompress multiple files compressed using legacy format: one output file per .lz4 + mv tmp-tlm1 tmp-tlm1-orig + mv tmp-tlm2 tmp-tlm2-orig + mv tmp-tlm3 tmp-tlm3-orig + $(LZ4) -d -f -m tmp-tlm*.lz4 + $(LZ4) -l -d -f -m tmp-tlm*.lz4 # -l mustn't impact -d option + $(CMP) tmp-tlm1 tmp-tlm1-orig # must be identical + $(CMP) tmp-tlm2 tmp-tlm2-orig + $(CMP) tmp-tlm3 tmp-tlm3-orig + # compress multiple files into stdout using legacy format + $(CAT) tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 > tmp-tlm-concat1 + $(RM) *.lz4 + $(LZ4) -l -m tmp-tlm1 tmp-tlm2 tmp-tlm3 -c > tmp-tlm-concat2 + test ! -f tmp-tlm1.lz4 # must not create .lz4 artefact + $(CMP) tmp-tlm-concat1 tmp-tlm-concat2 # must be equivalent + # # # decompress multiple files into stdout using legacy format + $(RM) tmp-tlm-concat1 tmp-tlm-concat2 + $(LZ4) -l -f -m tmp-tlm1 tmp-tlm2 tmp-tlm3 # generate .lz4 to decompress + $(CAT) tmp-tlm1 tmp-tlm2 tmp-tlm3 > tmp-tlm-concat1 # create concatenated reference + $(RM) tmp-tlm1 tmp-tlm2 tmp-tlm3 + $(LZ4) -d -m tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 -c > tmp-tlm-concat2 + $(LZ4) -d -l -m tmp-tlm1.lz4 tmp-tlm2.lz4 tmp-tlm3.lz4 -c > tmp-tlm-concat2 # -l mustn't impact option -d + test ! -f tmp-tlm1 # must not create file artefact + $(CMP) tmp-tlm-concat1 tmp-tlm-concat2 # must be equivalent + # # # compress multiple files, one of which is absent (must fail) + ! $(LZ4) -f -l -m tmp-tlm-concat1 notHere-legacy tmp-tlm-concat2 # must fail : notHere-legacy not present + @$(RM) tmp-tlm* + +test-lz4-basic: lz4 datagen unlz4 lz4cat + @echo "\n ---- test lz4 basic compression/decompression ----" + $(DATAGEN) -g0 | $(LZ4) -v | $(LZ4) -t + $(DATAGEN) -g16KB | $(LZ4) -9 | $(LZ4) -t + $(DATAGEN) -g20KB > tmp-tlb-dg20k + $(LZ4) < tmp-tlb-dg20k | $(LZ4) -d > tmp-tlb-dec + $(DIFF) -q tmp-tlb-dg20k tmp-tlb-dec + $(LZ4) --no-frame-crc < tmp-tlb-dg20k | $(LZ4) -d > tmp-tlb-dec + $(DIFF) -q tmp-tlb-dg20k tmp-tlb-dec + $(DATAGEN) | $(LZ4) -BI | $(LZ4) -t + $(DATAGEN) -g6M -P99 | $(LZ4) -9BD | $(LZ4) -t + $(DATAGEN) -g17M | $(LZ4) -9v | $(LZ4) -qt + $(DATAGEN) -g33M | $(LZ4) --no-frame-crc | $(LZ4) -t + $(DATAGEN) -g256MB | $(LZ4) -vqB4D | $(LZ4) -t + @echo "hello world" > tmp-tlb-hw + $(LZ4) --rm -f tmp-tlb-hw tmp-tlb-hw.lz4 + test ! -f tmp-tlb-hw # must fail (--rm) + test -f tmp-tlb-hw.lz4 + $(PRGDIR)/lz4cat tmp-tlb-hw.lz4 # must display hello world + test -f tmp-tlb-hw.lz4 + $(PRGDIR)/unlz4 --rm tmp-tlb-hw.lz4 tmp-tlb-hw + test -f tmp-tlb-hw + test ! -f tmp-tlb-hw.lz4 # must fail (--rm) + test ! -f tmp-tlb-hw.lz4.lz4 # must fail (unlz4) + $(PRGDIR)/lz4cat tmp-tlb-hw # pass-through mode + test -f tmp-tlb-hw + test ! -f tmp-tlb-hw.lz4 # must fail (lz4cat) + $(LZ4) tmp-tlb-hw tmp-tlb-hw.lz4 # creates tmp-tlb-hw.lz4 + $(PRGDIR)/lz4cat < tmp-tlb-hw.lz4 > tmp-tlb3 # checks lz4cat works with stdin (#285) + $(DIFF) -q tmp-tlb-hw tmp-tlb3 + $(PRGDIR)/lz4cat < tmp-tlb-hw > tmp-tlb2 # checks lz4cat works in pass-through mode + $(DIFF) -q tmp-tlb-hw tmp-tlb2 + cp tmp-tlb-hw ./-d + $(LZ4) --rm -- -d -d.lz4 # compresses ./d into ./-d.lz4 + test -f ./-d.lz4 + test ! -f ./-d + mv ./-d.lz4 ./-z + $(LZ4) -d --rm -- -z tmp-tlb4 # uncompresses ./-z into tmp-tlb4 + test ! -f ./-z + $(DIFF) -q tmp-tlb-hw tmp-tlb4 + $(LZ4) -f tmp-tlb-hw + $(LZ4) --list tmp-tlb-hw.lz4 # test --list on valid single-frame file + $(CAT) tmp-tlb-hw >> tmp-tlb-hw.lz4 + $(LZ4) -f tmp-tlb-hw.lz4 # uncompress valid frame followed by invalid data + $(LZ4) -BX tmp-tlb-hw -c -q | $(LZ4) -tv # test block checksum + # $(DATAGEN) -g20KB generates the same file every single time + # cannot save output of $(DATAGEN) -g20KB as input file to lz4 because the following shell commands are run before $(DATAGEN) -g20KB + test "$(shell $(DATAGEN) -g20KB | $(LZ4) -c --fast | wc -c)" -lt "$(shell $(DATAGEN) -g20KB | $(LZ4) -c --fast=9 | wc -c)" # -1 vs -9 + test "$(shell $(DATAGEN) -g20KB | $(LZ4) -c -1 | wc -c)" -lt "$(shell $(DATAGEN) -g20KB| $(LZ4) -c --fast=1 | wc -c)" # 1 vs -1 + test "$(shell $(DATAGEN) -g20KB | $(LZ4) -c --fast=1 | wc -c)" -eq "$(shell $(DATAGEN) -g20KB| $(LZ4) -c --fast| wc -c)" # checks default fast compression is -1 + ! $(LZ4) -c --fast=0 tmp-tlb-dg20K # lz4 should fail when fast=0 + ! $(LZ4) -c --fast=-1 tmp-tlb-dg20K # lz4 should fail when fast=-1 + # High --fast values can result in out-of-bound dereferences #876 + $(DATAGEN) -g1M | $(LZ4) -c --fast=999999999 > /dev/null + # Test for #596 + @echo "TEST" > tmp-tlb-test + $(LZ4) -m tmp-tlb-test + $(LZ4) tmp-tlb-test.lz4 tmp-tlb-test2 + $(DIFF) -q tmp-tlb-test tmp-tlb-test2 + @$(RM) tmp-tlb* + + + +test-lz4-dict: lz4 datagen + @echo "\n ---- test lz4 compression/decompression with dictionary ----" + $(DATAGEN) -g16KB > tmp-dict + $(DATAGEN) -g32KB > tmp-dict-sample-32k + < tmp-dict-sample-32k $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-32k + $(DATAGEN) -g128MB > tmp-dict-sample-128m + < tmp-dict-sample-128m $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-128m + touch tmp-dict-sample-0 + < tmp-dict-sample-0 $(LZ4) -D tmp-dict | $(LZ4) -dD tmp-dict | diff - tmp-dict-sample-0 + + < tmp-dict-sample-32k $(LZ4) -D tmp-dict-sample-0 | $(LZ4) -dD tmp-dict-sample-0 | diff - tmp-dict-sample-32k + < tmp-dict-sample-0 $(LZ4) -D tmp-dict-sample-0 | $(LZ4) -dD tmp-dict-sample-0 | diff - tmp-dict-sample-0 + + @echo "\n ---- test lz4 dictionary loading ----" + $(DATAGEN) -g128KB > tmp-dict-data-128KB + set -e; \ + for l in 0 1 4 128 32767 32768 32769 65535 65536 65537 98303 98304 98305 131071 131072 131073; do \ + $(DATAGEN) -g$$l > tmp-dict-$$l; \ + $(DD) if=tmp-dict-$$l of=tmp-dict-$$l-tail bs=1 count=65536 skip=$$((l > 65536 ? l - 65536 : 0)); \ + < tmp-dict-$$l $(LZ4) -D stdin tmp-dict-data-128KB -c | $(LZ4) -dD tmp-dict-$$l-tail | $(DIFF) - tmp-dict-data-128KB; \ + < tmp-dict-$$l-tail $(LZ4) -D stdin tmp-dict-data-128KB -c | $(LZ4) -dD tmp-dict-$$l | $(DIFF) - tmp-dict-data-128KB; \ + done + + @$(RM) tmp-dict* + +test-lz4-hugefile: lz4 datagen + @echo "\n ---- test huge files compression/decompression ----" + ./datagen -g6GB | $(LZ4) -vB5D | $(LZ4) -qt + ./datagen -g4500MB | $(LZ4) -v3BD | $(LZ4) -qt + # test large file size [2-4] GB + @$(DATAGEN) -g3G -P100 | $(LZ4) -vv | $(LZ4) --decompress --force --sparse - tmphf1 + @ls -ls tmphf1 + @$(DATAGEN) -g3G -P100 | $(LZ4) --quiet --content-size | $(LZ4) --verbose --decompress --force --sparse - tmphf2 + @ls -ls tmphf2 + $(DIFF) -s tmphf1 tmphf2 + @$(RM) tmphf* + +test-lz4-testmode: lz4 datagen + @echo "\n ---- bench mode ----" + $(LZ4) -bi0 + @echo "\n ---- test mode ----" + ! $(DATAGEN) | $(LZ4) -t + ! $(DATAGEN) | $(LZ4) -tf + @echo "\n ---- pass-through mode ----" + @echo "Why hello there " > tmp-tlt2.lz4 + ! $(LZ4) -f tmp-tlt2.lz4 > $(VOID) + ! $(DATAGEN) | $(LZ4) -dc > $(VOID) + ! $(DATAGEN) | $(LZ4) -df > $(VOID) + $(DATAGEN) | $(LZ4) -dcf > $(VOID) + @echo "Hello World !" > tmp-tlt1 + $(LZ4) -dcf tmp-tlt1 + @echo "from underground..." > tmp-tlt2 + $(LZ4) -dcfm tmp-tlt1 tmp-tlt2 + @echo "\n ---- non-existing source ----" + ! $(LZ4) file-does-not-exist + ! $(LZ4) -f file-does-not-exist + ! $(LZ4) -t file-does-not-exist + ! $(LZ4) -fm file1-dne file2-dne + @$(RM) tmp-tlt tmp-tlt1 tmp-tlt2 tmp-tlt2.lz4 + +test-lz4-opt-parser: lz4 datagen + @echo "\n ---- test opt-parser ----" + $(DATAGEN) -g16KB | $(LZ4) -12 | $(LZ4) -t + $(DATAGEN) -P10 | $(LZ4) -12B4 | $(LZ4) -t + $(DATAGEN) -g256K | $(LZ4) -12B4D | $(LZ4) -t + $(DATAGEN) -g512K -P25 | $(LZ4) -12BD | $(LZ4) -t + $(DATAGEN) -g1M | $(LZ4) -12B5 | $(LZ4) -t + $(DATAGEN) -g2M -P99 | $(LZ4) -11B4D | $(LZ4) -t + $(DATAGEN) -g4M | $(LZ4) -11vq | $(LZ4) -qt + $(DATAGEN) -g8M | $(LZ4) -11B4 | $(LZ4) -t + $(DATAGEN) -g16M -P90 | $(LZ4) -11B5 | $(LZ4) -t + $(DATAGEN) -g32M -P10 | $(LZ4) -11B5D | $(LZ4) -t + +test-lz4-essentials : lz4 datagen test-lz4-basic test-lz4-multiple test-lz4-multiple-legacy \ + test-lz4-frame-concatenation test-lz4-testmode \ + test-lz4-contentSize test-lz4-dict + @$(RM) tmp* + +test-lz4: lz4 datagen test-lz4-essentials test-lz4-opt-parser \ + test-lz4-sparse test-lz4-hugefile test-lz4-dict + @$(RM) tmp* + +test-lz4c: lz4c datagen + @echo "\n ---- test lz4c variant ----" + $(DATAGEN) -g256MB | $(LZ4)c -l -v | $(LZ4)c -t + +test-lz4c32: CFLAGS+=-m32 +test-lz4c32: test-lz4 + +test-interop-32-64: lz4 lz4c32 datagen + @echo "\n ---- test interoperability 32-bits -vs- 64 bits ----" + $(DATAGEN) -g16KB | $(LZ4)c32 -9 | $(LZ4) -t + $(DATAGEN) -P10 | $(LZ4) -9B4 | $(LZ4)c32 -t + $(DATAGEN) | $(LZ4)c32 | $(LZ4) -t + $(DATAGEN) -g1M | $(LZ4) -3B5 | $(LZ4)c32 -t + $(DATAGEN) -g256MB | $(LZ4)c32 -vqB4D | $(LZ4) -qt + $(DATAGEN) -g1G -P90 | $(LZ4) | $(LZ4)c32 -t + $(DATAGEN) -g6GB | $(LZ4)c32 -vq9BD | $(LZ4) -qt + +test-lz4c32-basic: lz4c32 datagen + @echo "\n ---- test lz4c32 32-bits version ----" + $(DATAGEN) -g16KB | $(LZ4)c32 -9 | $(LZ4)c32 -t + $(DATAGEN) | $(LZ4)c32 | $(LZ4)c32 -t + $(DATAGEN) -g256MB | $(LZ4)c32 -vqB4D | $(LZ4)c32 -qt + $(DATAGEN) -g6GB | $(LZ4)c32 -vqB5D | $(LZ4)c32 -qt + +test-platform: + @echo "\n ---- test lz4 $(QEMU_SYS) platform ----" + $(QEMU_SYS) $(DATAGEN) -g16KB | $(QEMU_SYS) $(LZ4) -9 | $(QEMU_SYS) $(LZ4) -t + $(QEMU_SYS) $(DATAGEN) | $(QEMU_SYS) $(LZ4) | $(QEMU_SYS) $(LZ4) -t + $(QEMU_SYS) $(DATAGEN) -g256MB | $(QEMU_SYS) $(LZ4) -vqB4D | $(QEMU_SYS) $(LZ4) -qt +ifneq ($(QEMU_SYS),qemu-arm-static) + $(QEMU_SYS) $(DATAGEN) -g3GB | $(QEMU_SYS) $(LZ4) -vqB5D | $(QEMU_SYS) $(LZ4) -qt +endif + +test-fullbench: fullbench + ./fullbench --no-prompt $(NB_LOOPS) $(TEST_FILES) + +test-fullbench32: CFLAGS += -m32 +test-fullbench32: test-fullbench + +test-fuzzer: fuzzer + ./fuzzer $(FUZZER_TIME) + +test-fuzzer32: CFLAGS += -m32 +test-fuzzer32: test-fuzzer + +test-frametest: frametest + ./frametest -v $(FUZZER_TIME) + +test-frametest32: CFLAGS += -m32 +test-frametest32: test-frametest + +test-mem: lz4 datagen fuzzer frametest fullbench + @echo "\n ---- valgrind tests : memory analyzer ----" + valgrind --leak-check=yes --error-exitcode=1 $(DATAGEN) -g50M > $(VOID) + $(DATAGEN) -g16KB > ftmdg16K + valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -BD -f ftmdg16K $(VOID) + $(DATAGEN) -g16KB -s2 > ftmdg16K2 + $(DATAGEN) -g16KB -s3 > ftmdg16K3 + valgrind --leak-check=yes --error-exitcode=1 $(LZ4) --force --multiple ftmdg16K ftmdg16K2 ftmdg16K3 + $(DATAGEN) -g7MB > ftmdg7M + valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -9 -B5D -f ftmdg7M ftmdg16K2 + valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -t ftmdg16K2 + valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -bi1 ftmdg7M + valgrind --leak-check=yes --error-exitcode=1 ./fullbench -i1 ftmdg7M ftmdg16K2 + valgrind --leak-check=yes --error-exitcode=1 $(LZ4) -B4D -f -vq ftmdg7M $(VOID) + valgrind --leak-check=yes --error-exitcode=1 $(LZ4) --list -m ftm*.lz4 + valgrind --leak-check=yes --error-exitcode=1 $(LZ4) --list -m -v ftm*.lz4 + $(RM) ftm* + valgrind --leak-check=yes --error-exitcode=1 ./fuzzer -i64 -t1 + valgrind --leak-check=yes --error-exitcode=1 ./frametest -i256 + +test-mem32: lz4c32 datagen +# unfortunately, valgrind doesn't seem to work with non-native binary... + +test-decompress-partial : decompress-partial + @echo "\n ---- test decompress-partial ----" + ./decompress-partial$(EXT) + +endif diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000..75b7b9f --- /dev/null +++ b/tests/README.md @@ -0,0 +1,71 @@ +Programs and scripts for automated testing of LZ4 +======================================================= + +This directory contains the following programs and scripts: +- `datagen` : Synthetic and parametrable data generator, for tests +- `frametest` : Test tool that checks lz4frame integrity on target platform +- `fullbench` : Precisely measure speed for each lz4 inner functions +- `fuzzer` : Test tool, to check lz4 integrity on target platform +- `test-lz4-speed.py` : script for testing lz4 speed difference between commits +- `test-lz4-versions.py` : compatibility test between lz4 versions stored on Github + + +#### `test-lz4-versions.py` - script for testing lz4 interoperability between versions + +This script creates `versionsTest` directory to which lz4 repository is cloned. +Then all taged (released) versions of lz4 are compiled. +In the following step interoperability between lz4 versions is checked. + + +#### `test-lz4-speed.py` - script for testing lz4 speed difference between commits + +This script creates `speedTest` directory to which lz4 repository is cloned. +Then it compiles all branches of lz4 and performs a speed benchmark for a given list of files (the `testFileNames` parameter). +After `sleepTime` (an optional parameter, default 300 seconds) seconds the script checks repository for new commits. +If a new commit is found it is compiled and a speed benchmark for this commit is performed. +The results of the speed benchmark are compared to the previous results. +If compression or decompression speed for one of lz4 levels is lower than `lowerLimit` (an optional parameter, default 0.98) the speed benchmark is restarted. +If second results are also lower than `lowerLimit` the warning e-mail is send to recipients from the list (the `emails` parameter). + +Additional remarks: +- To be sure that speed results are accurate the script should be run on a "stable" target system with no other jobs running in parallel +- Using the script with virtual machines can lead to large variations of speed results +- The speed benchmark is not performed until computers' load average is lower than `maxLoadAvg` (an optional parameter, default 0.75) +- The script sends e-mails using `mutt`; if `mutt` is not available it sends e-mails without attachments using `mail`; if both are not available it only prints a warning + + +The example usage with two test files, one e-mail address, and with an additional message: +``` +./test-lz4-speed.py "silesia.tar calgary.tar" "email@gmail.com" --message "tested on my laptop" --sleepTime 60 +``` + +To run the script in background please use: +``` +nohup ./test-lz4-speed.py testFileNames emails & +``` + +The full list of parameters: +``` +positional arguments: + testFileNames file names list for speed benchmark + emails list of e-mail addresses to send warnings + +optional arguments: + -h, --help show this help message and exit + --message MESSAGE attach an additional message to e-mail + --lowerLimit LOWERLIMIT + send email if speed is lower than given limit + --maxLoadAvg MAXLOADAVG + maximum load average to start testing + --lastCLevel LASTCLEVEL + last compression level for testing + --sleepTime SLEEPTIME + frequency of repository checking in seconds +``` + + +#### License + +All files in this directory are licensed under GPL-v2. +See [COPYING](COPYING) for details. +The text of the license is also included at the top of each source file. diff --git a/tests/checkFrame.c b/tests/checkFrame.c new file mode 100644 index 0000000..f9a1c14 --- /dev/null +++ b/tests/checkFrame.c @@ -0,0 +1,303 @@ + /* + checkFrame - verify frame headers + Copyright (C) Yann Collet 2014-present + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 + */ + + /*-************************************ + * Includes + **************************************/ + #include "util.h" /* U32 */ + #include /* malloc, free */ + #include /* fprintf */ + #include /* strcmp */ + #include /* clock_t, clock(), CLOCKS_PER_SEC */ + #include + #include "lz4frame.h" /* include multiple times to test correctness/safety */ + #include "lz4frame.h" + #define LZ4F_STATIC_LINKING_ONLY + #include "lz4frame.h" + #include "lz4frame.h" + #include "lz4.h" /* LZ4_VERSION_STRING */ + #define XXH_STATIC_LINKING_ONLY + #include "xxhash.h" /* XXH64 */ + + + /*-************************************ + * Constants + **************************************/ + #define KB *(1U<<10) + #define MB *(1U<<20) + #define GB *(1U<<30) + + + /*-************************************ + * Macros + **************************************/ + #define DISPLAY(...) fprintf(stderr, __VA_ARGS__) + #define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } + + /************************************** + * Exceptions + ***************************************/ + #ifndef DEBUG + # define DEBUG 0 + #endif + #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__); + #define EXM_THROW(error, ...) \ +{ \ + DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \ + DISPLAYLEVEL(1, "Error %i : ", error); \ + DISPLAYLEVEL(1, __VA_ARGS__); \ + DISPLAYLEVEL(1, " \n"); \ + return(error); \ +} + + + +/*-*************************************** +* Local Parameters +*****************************************/ +static U32 no_prompt = 0; +static U32 displayLevel = 2; +static U32 use_pause = 0; + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) + +typedef struct { + void* srcBuffer; + size_t srcBufferSize; + void* dstBuffer; + size_t dstBufferSize; + LZ4F_decompressionContext_t ctx; +} cRess_t; + +static int createCResources(cRess_t* ress) +{ + ress->srcBufferSize = 4 MB; + ress->srcBuffer = malloc(ress->srcBufferSize); + ress->dstBufferSize = 4 MB; + ress->dstBuffer = malloc(ress->dstBufferSize); + + if (!ress->srcBuffer || !ress->dstBuffer) { + free(ress->srcBuffer); + free(ress->dstBuffer); + EXM_THROW(20, "Allocation error : not enough memory"); + } + + if (LZ4F_isError( LZ4F_createDecompressionContext(&(ress->ctx), LZ4F_VERSION) )) { + free(ress->srcBuffer); + free(ress->dstBuffer); + EXM_THROW(21, "Unable to create decompression context"); + } + return 0; +} + +static void freeCResources(cRess_t ress) +{ + free(ress.srcBuffer); + free(ress.dstBuffer); + + (void) LZ4F_freeDecompressionContext(ress.ctx); +} + +int frameCheck(cRess_t ress, FILE* const srcFile, unsigned bsid, size_t blockSize) +{ + LZ4F_errorCode_t nextToLoad = 0; + size_t curblocksize = 0; + int partialBlock = 0; + + /* Main Loop */ + for (;;) { + size_t readSize; + size_t pos = 0; + size_t decodedBytes = ress.dstBufferSize; + size_t remaining; + LZ4F_frameInfo_t frameInfo; + + /* Read input */ + readSize = fread(ress.srcBuffer, 1, ress.srcBufferSize, srcFile); + if (!readSize) break; /* reached end of file or stream */ + + while (pos < readSize) { /* still to read */ + /* Decode Input (at least partially) */ + if (!nextToLoad) { + /* LZ4F_decompress returned 0 : starting new frame */ + curblocksize = 0; + remaining = readSize - pos; + nextToLoad = LZ4F_getFrameInfo(ress.ctx, &frameInfo, (char*)(ress.srcBuffer)+pos, &remaining); + if (LZ4F_isError(nextToLoad)) + EXM_THROW(22, "Error getting frame info: %s", + LZ4F_getErrorName(nextToLoad)); + if (frameInfo.blockSizeID != bsid) + EXM_THROW(23, "Block size ID %u != expected %u", + frameInfo.blockSizeID, bsid); + pos += remaining; + /* nextToLoad should be block header size */ + remaining = nextToLoad; + decodedBytes = ress.dstBufferSize; + nextToLoad = LZ4F_decompress(ress.ctx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(24, "Decompression error : %s", LZ4F_getErrorName(nextToLoad)); + pos += remaining; + } + decodedBytes = ress.dstBufferSize; + /* nextToLoad should be just enough to cover the next block */ + if (nextToLoad > (readSize - pos)) { + /* block is not fully contained in current buffer */ + partialBlock = 1; + remaining = readSize - pos; + } else { + if (partialBlock) { + partialBlock = 0; + } + remaining = nextToLoad; + } + nextToLoad = LZ4F_decompress(ress.ctx, ress.dstBuffer, &decodedBytes, (char*)(ress.srcBuffer)+pos, &remaining, NULL); + if (LZ4F_isError(nextToLoad)) EXM_THROW(24, "Decompression error : %s", LZ4F_getErrorName(nextToLoad)); + curblocksize += decodedBytes; + pos += remaining; + if (!partialBlock) { + /* detect small block due to end of frame; the final 4-byte frame checksum could be left in the buffer */ + if ((curblocksize != 0) && (nextToLoad > 4)) { + if (curblocksize != blockSize) + EXM_THROW(25, "Block size %u != expected %u, pos %u\n", + (unsigned)curblocksize, (unsigned)blockSize, (unsigned)pos); + } + curblocksize = 0; + } + } + } + /* can be out because readSize == 0, which could be an fread() error */ + if (ferror(srcFile)) EXM_THROW(26, "Read error"); + + if (nextToLoad!=0) EXM_THROW(27, "Unfinished stream"); + + return 0; +} + +int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args] filename\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -b# : expected blocksizeID [4-7] (required)\n"); + DISPLAY( " -B# : expected blocksize [32-4194304] (required)\n"); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + int argNb; + unsigned bsid=0; + size_t blockSize=0; + const char* const programName = argv[0]; + + /* Check command line */ + for (argNb=1; argNb ='0') && (*argument<='9')) { + bsid *= 10; + bsid += (unsigned)(*argument - '0'); + argument++; + } + break; + + case 'B': + argument++; + blockSize=0; + while ((*argument>='0') && (*argument<='9')) { + blockSize *= 10; + blockSize += (size_t)(*argument - '0'); + argument++; + } + break; + + default: + ; + return FUZ_usage(programName); + } + } + } else { + int err; + FILE *srcFile; + cRess_t ress; + if (bsid == 0 || blockSize == 0) + return FUZ_usage(programName); + DISPLAY("Starting frame checker (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION_STRING); + err = createCResources(&ress); + if (err) return (err); + srcFile = fopen(argument, "rb"); + if ( srcFile==NULL ) { + freeCResources(ress); + EXM_THROW(1, "%s: %s \n", argument, strerror(errno)); + } + assert (srcFile != NULL); + err = frameCheck(ress, srcFile, bsid, blockSize); + freeCResources(ress); + fclose(srcFile); + return (err); + } + } + return 0; +} diff --git a/tests/checkTag.c b/tests/checkTag.c new file mode 100644 index 0000000..4a33415 --- /dev/null +++ b/tests/checkTag.c @@ -0,0 +1,79 @@ +/* + checkTag.c - Version validation tool for LZ4 + Copyright (C) Yann Collet 2018 - present + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repo : https://github.com/lz4/lz4 +*/ + +/* checkTag command : + * $ ./checkTag tag + * checkTag validates tags of following format : v[0-9].[0-9].[0-9]{any} + * The tag is then compared to LZ4 version number. + * They are compatible if first 3 digits are identical. + * Anything beyond that is free, and doesn't impact validation. + * Example : tag v1.8.1.2 is compatible with version 1.8.1 + * When tag and version are not compatible, program exits with error code 1. + * When they are compatible, it exists with a code 0. + * checkTag is intended to be used in automated testing environment. + */ + +#include /* printf */ +#include /* strlen, strncmp */ +#include "lz4.h" /* LZ4_VERSION_STRING */ + + +/* validate() : + * @return 1 if tag is compatible, 0 if not. + */ +static int validate(const char* const tag) +{ + size_t const tagLength = strlen(tag); + size_t const verLength = strlen(LZ4_VERSION_STRING); + + if (tagLength < 2) return 0; + if (tag[0] != 'v') return 0; + if (tagLength <= verLength) return 0; + + if (strncmp(LZ4_VERSION_STRING, tag+1, verLength)) return 0; + + return 1; +} + +int main(int argc, const char** argv) +{ + const char* const exeName = argv[0]; + const char* const tag = argv[1]; + if (argc!=2) { + printf("incorrect usage : %s tag \n", exeName); + return 2; + } + + printf("Version : %s \n", LZ4_VERSION_STRING); + printf("Tag : %s \n", tag); + + if (validate(tag)) { + printf("OK : tag is compatible with lz4 version \n"); + return 0; + } + + printf("!! error : tag and versions are not compatible !! \n"); + return 1; +} diff --git a/tests/datagencli.c b/tests/datagencli.c new file mode 100644 index 0000000..c985197 --- /dev/null +++ b/tests/datagencli.c @@ -0,0 +1,172 @@ +/* + datagencli.c + compressible data command line generator + Copyright (C) Yann Collet 2012-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - Public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + +/************************************** +* Includes +**************************************/ +#include "util.h" /* U32 */ +#include /* fprintf, stderr */ +#include "datagen.h" /* RDG_generate */ +#include "lz4.h" /* LZ4_VERSION_STRING */ + + +/************************************** +* Constants +**************************************/ +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define SIZE_DEFAULT (64 KB) +#define SEED_DEFAULT 0 +#define COMPRESSIBILITY_DEFAULT 50 + + +/************************************** +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static unsigned displayLevel = 2; + + +/********************************************************* +* Command line +*********************************************************/ +static int usage(char* programName) +{ + DISPLAY( "Compressible data generator\n"); + DISPLAY( "Usage :\n"); + DISPLAY( " %s [size] [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT); + DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", COMPRESSIBILITY_DEFAULT); + DISPLAY( " -h : display help and exit\n"); + DISPLAY( "Special values :\n"); + DISPLAY( " -P0 : generate incompressible noise\n"); + DISPLAY( " -P100 : generate sparse files\n"); + return 0; +} + + +int main(int argc, char** argv) +{ + int argNb; + double proba = (double)COMPRESSIBILITY_DEFAULT / 100; + double litProba = 0.0; + U64 size = SIZE_DEFAULT; + U32 seed = SEED_DEFAULT; + char* programName; + + /* Check command line */ + programName = argv[0]; + for(argNb=1; argNb ='0') && (*argument<='9')) + { + size *= 10; + size += *argument - '0'; + argument++; + } + if (*argument=='K') { size <<= 10; argument++; } + if (*argument=='M') { size <<= 20; argument++; } + if (*argument=='G') { size <<= 30; argument++; } + if (*argument=='B') { argument++; } + break; + case 's': + argument++; + seed=0; + while ((*argument>='0') && (*argument<='9')) + { + seed *= 10; + seed += *argument - '0'; + argument++; + } + break; + case 'P': + argument++; + proba=0.0; + while ((*argument>='0') && (*argument<='9')) + { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba>100.) proba=100.; + proba /= 100.; + break; + case 'L': /* hidden argument : Literal distribution probability */ + argument++; + litProba=0.; + while ((*argument>='0') && (*argument<='9')) + { + litProba *= 10; + litProba += *argument - '0'; + argument++; + } + if (litProba>100.) litProba=100.; + litProba /= 100.; + break; + case 'v': + displayLevel = 4; + argument++; + break; + default: + return usage(programName); + } + } + + } + } + + DISPLAYLEVEL(4, "Data Generator %s \n", LZ4_VERSION_STRING); + DISPLAYLEVEL(3, "Seed = %u \n", seed); + if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100)); + + RDG_genOut(size, proba, litProba, seed); + DISPLAYLEVEL(1, "\n"); + + return 0; +} diff --git a/tests/decompress-partial.c b/tests/decompress-partial.c new file mode 100644 index 0000000..4e124b7 --- /dev/null +++ b/tests/decompress-partial.c @@ -0,0 +1,49 @@ +#include "stdio.h" +#include "string.h" +#include "lz4.h" + +const char source[] = + "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod\n" + "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim\n" + "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea\n" + "commodo consequat. Duis aute irure dolor in reprehenderit in voluptate\n" + "velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat\n" + "cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id\n" + "est laborum.\n" + "\n" + "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium\n" + "doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore\n" + "veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim\n" + "ipsam voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia\n" + "consequuntur magni dolores eos qui ratione voluptatem sequi nesciunt. Neque\n" + "porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur,\n" + "adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore\n" + "et dolore magnam aliquam quaerat voluptatem. Ut enim ad minima veniam, quis\n" + "nostrum exercitationem ullam corporis suscipit laboriosam, nisi ut aliquid\n" + "ex ea commodi consequatur? Quis autem vel eum iure reprehenderit qui in ea\n" + "voluptate velit esse quam nihil molestiae consequatur, vel illum qui\n" + "dolorem eum fugiat quo voluptas nulla pariatur?\n"; + +#define BUFFER_SIZE 2048 + +int main(void) +{ + int srcLen = (int)strlen(source); + char cmpBuffer[BUFFER_SIZE]; + char outBuffer[BUFFER_SIZE]; + int cmpSize; + int i; + + cmpSize = LZ4_compress_default(source, cmpBuffer, srcLen, BUFFER_SIZE); + + for (i = cmpSize; i < cmpSize + 10; ++i) { + int result = LZ4_decompress_safe_partial(cmpBuffer, outBuffer, i, srcLen, BUFFER_SIZE); + if ((result < 0) || (result != srcLen) || memcmp(source, outBuffer, srcLen)) { + printf("test decompress-partial error \n"); + return -1; + } + } + + printf("test decompress-partial OK \n"); + return 0; +} diff --git a/tests/frametest.c b/tests/frametest.c new file mode 100644 index 0000000..e613cbf --- /dev/null +++ b/tests/frametest.c @@ -0,0 +1,1281 @@ +/* + frameTest - test tool for lz4frame + Copyright (C) Yann Collet 2014-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repository : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Compiler specific +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 26451) /* disable: Arithmetic overflow */ +#endif + + +/*-************************************ +* Includes +**************************************/ +#include "util.h" /* U32 */ +#include /* malloc, free */ +#include /* fprintf */ +#include /* strcmp */ +#include /* clock_t, clock(), CLOCKS_PER_SEC */ +#include +#include "lz4frame.h" /* included multiple times to test correctness/safety */ +#include "lz4frame.h" +#define LZ4F_STATIC_LINKING_ONLY +#include "lz4frame.h" +#include "lz4frame.h" +#define LZ4_STATIC_LINKING_ONLY /* LZ4_DISTANCE_MAX */ +#include "lz4.h" /* LZ4_VERSION_STRING */ +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" /* XXH64 */ + + +/* unoptimized version; solves endianess & alignment issues */ +static void FUZ_writeLE32 (void* dstVoidPtr, U32 value32) +{ + BYTE* dstPtr = (BYTE*)dstVoidPtr; + dstPtr[0] = (BYTE) value32; + dstPtr[1] = (BYTE)(value32 >> 8); + dstPtr[2] = (BYTE)(value32 >> 16); + dstPtr[3] = (BYTE)(value32 >> 24); +} + + +/*-************************************ +* Constants +**************************************/ +#define LZ4F_MAGIC_SKIPPABLE_START 0x184D2A50U + +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + +static const U32 nbTestsDefault = 256 KB; +#define FUZ_COMPRESSIBILITY_DEFAULT 50 +static const U32 prime1 = 2654435761U; +static const U32 prime2 = 2246822519U; + + +/*-************************************ +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); } +#define DISPLAYUPDATE(l, ...) if (displayLevel>=l) { \ + if ((FUZ_GetClockSpan(g_clockTime) > refreshRate) || (displayLevel>=4)) \ + { g_clockTime = clock(); DISPLAY(__VA_ARGS__); \ + if (displayLevel>=4) fflush(stdout); } } +static const clock_t refreshRate = CLOCKS_PER_SEC / 6; +static clock_t g_clockTime = 0; + + +/*-*************************************** +* Local Parameters +*****************************************/ +static U32 no_prompt = 0; +static U32 displayLevel = 2; +static U32 use_pause = 0; + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) +#define MAX(a,b) ( (a) > (b) ? (a) : (b) ) + +static clock_t FUZ_GetClockSpan(clock_t clockStart) +{ + return clock() - clockStart; /* works even if overflow; max span ~ 30 mn */ +} + + +#define FUZ_rotl32(x,r) ((x << r) | (x >> (32 - r))) +unsigned int FUZ_rand(unsigned int* src) +{ + U32 rand32 = *src; + rand32 *= prime1; + rand32 += prime2; + rand32 = FUZ_rotl32(rand32, 13); + *src = rand32; + return rand32 >> 5; +} + + +#define FUZ_RAND15BITS (FUZ_rand(seed) & 0x7FFF) +#define FUZ_RANDLENGTH ( (FUZ_rand(seed) & 3) ? (FUZ_rand(seed) % 15) : (FUZ_rand(seed) % 510) + 15) +static void FUZ_fillCompressibleNoiseBuffer(void* buffer, size_t bufferSize, double proba, U32* seed) +{ + BYTE* BBuffer = (BYTE*)buffer; + size_t pos = 0; + U32 P32 = (U32)(32768 * proba); + + /* First Byte */ + BBuffer[pos++] = (BYTE)(FUZ_rand(seed)); + + while (pos < bufferSize) { + /* Select : Literal (noise) or copy (within 64K) */ + if (FUZ_RAND15BITS < P32) { + /* Copy (within 64K) */ + size_t const lengthRand = FUZ_RANDLENGTH + 4; + size_t const length = MIN(lengthRand, bufferSize - pos); + size_t const end = pos + length; + size_t const offsetRand = FUZ_RAND15BITS + 1; + size_t const offset = MIN(offsetRand, pos); + size_t match = pos - offset; + while (pos < end) BBuffer[pos++] = BBuffer[match++]; + } else { + /* Literal (noise) */ + size_t const lengthRand = FUZ_RANDLENGTH + 4; + size_t const length = MIN(lengthRand, bufferSize - pos); + size_t const end = pos + length; + while (pos < end) BBuffer[pos++] = (BYTE)(FUZ_rand(seed) >> 5); + } } +} + + +static unsigned FUZ_highbit(U32 v32) +{ + unsigned nbBits = 0; + if (v32==0) return 0; + while (v32) {v32 >>= 1; nbBits ++;} + return nbBits; +} + + +/*-******************************************************* +* Tests +*********************************************************/ +#define CHECK_V(v,f) v = f; if (LZ4F_isError(v)) { fprintf(stderr, "%s \n", LZ4F_getErrorName(v)); goto _output_error; } +#define CHECK(f) { LZ4F_errorCode_t const CHECK_V(err_ , f); } + +int basicTests(U32 seed, double compressibility) +{ +#define COMPRESSIBLE_NOISE_LENGTH (2 MB) + void* const CNBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH); + size_t const cBuffSize = LZ4F_compressFrameBound(COMPRESSIBLE_NOISE_LENGTH, NULL); + void* const compressedBuffer = malloc(cBuffSize); + void* const decodedBuffer = malloc(COMPRESSIBLE_NOISE_LENGTH); + U32 randState = seed; + size_t cSize, testSize; + LZ4F_decompressionContext_t dCtx = NULL; + LZ4F_compressionContext_t cctx = NULL; + U64 crcOrig; + int basicTests_error = 0; + LZ4F_preferences_t prefs; + memset(&prefs, 0, sizeof(prefs)); + + if (!CNBuffer || !compressedBuffer || !decodedBuffer) { + DISPLAY("allocation error, not enough memory to start fuzzer tests \n"); + goto _output_error; + } + FUZ_fillCompressibleNoiseBuffer(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, &randState); + crcOrig = XXH64(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, 1); + + /* LZ4F_compressBound() : special case : srcSize == 0 */ + DISPLAYLEVEL(3, "LZ4F_compressBound(0) = "); + { size_t const cBound = LZ4F_compressBound(0, NULL); + if (cBound < 64 KB) goto _output_error; + DISPLAYLEVEL(3, " %u \n", (U32)cBound); + } + + /* LZ4F_compressBound() : special case : automatic flushing enabled */ + DISPLAYLEVEL(3, "LZ4F_compressBound(1 KB, autoFlush=1) = "); + { size_t cBound; + LZ4F_preferences_t autoFlushPrefs; + memset(&autoFlushPrefs, 0, sizeof(autoFlushPrefs)); + autoFlushPrefs.autoFlush = 1; + cBound = LZ4F_compressBound(1 KB, &autoFlushPrefs); + if (cBound > 64 KB) goto _output_error; + DISPLAYLEVEL(3, " %u \n", (U32)cBound); + } + + /* LZ4F_compressBound() : special case : automatic flushing disabled */ + DISPLAYLEVEL(3, "LZ4F_compressBound(1 KB, autoFlush=0) = "); + { size_t const cBound = LZ4F_compressBound(1 KB, &prefs); + if (cBound < 64 KB) goto _output_error; + DISPLAYLEVEL(3, " %u \n", (U32)cBound); + } + + /* Special case : null-content frame */ + testSize = 0; + DISPLAYLEVEL(3, "LZ4F_compressFrame, compress null content : "); + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, NULL), CNBuffer, testSize, NULL)); + DISPLAYLEVEL(3, "null content encoded into a %u bytes frame \n", (unsigned)cSize); + + DISPLAYLEVEL(3, "LZ4F_createDecompressionContext \n"); + CHECK ( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) ); + + DISPLAYLEVEL(3, "LZ4F_getFrameInfo on null-content frame (#157) \n"); + assert(cSize >= LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH); + { LZ4F_frameInfo_t frame_info; + size_t const fhs = LZ4F_headerSize(compressedBuffer, LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH); + size_t avail_in = fhs; + CHECK( fhs ); + CHECK( LZ4F_getFrameInfo(dCtx, &frame_info, compressedBuffer, &avail_in) ); + if (avail_in != fhs) goto _output_error; /* must consume all, since header size is supposed to be exact */ + } + + DISPLAYLEVEL(3, "LZ4F_freeDecompressionContext \n"); + CHECK( LZ4F_freeDecompressionContext(dCtx) ); + dCtx = NULL; + + /* test one-pass frame compression */ + testSize = COMPRESSIBLE_NOISE_LENGTH; + + DISPLAYLEVEL(3, "LZ4F_compressFrame, using fast level -3 : "); + { LZ4F_preferences_t fastCompressPrefs; + memset(&fastCompressPrefs, 0, sizeof(fastCompressPrefs)); + fastCompressPrefs.compressionLevel = -3; + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, NULL), CNBuffer, testSize, &fastCompressPrefs)); + DISPLAYLEVEL(3, "Compressed %u bytes into a %u bytes frame \n", (U32)testSize, (U32)cSize); + } + + DISPLAYLEVEL(3, "LZ4F_compressFrame, using default preferences : "); + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, NULL), CNBuffer, testSize, NULL)); + DISPLAYLEVEL(3, "Compressed %u bytes into a %u bytes frame \n", (U32)testSize, (U32)cSize); + + DISPLAYLEVEL(3, "Decompression test : \n"); + { size_t decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH; + size_t compressedBufferSize = cSize; + + CHECK( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) ); + + DISPLAYLEVEL(3, "Single Pass decompression : "); + CHECK( LZ4F_decompress(dCtx, decodedBuffer, &decodedBufferSize, compressedBuffer, &compressedBufferSize, NULL) ); + { U64 const crcDest = XXH64(decodedBuffer, decodedBufferSize, 1); + if (crcDest != crcOrig) goto _output_error; } + DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedBufferSize); + + DISPLAYLEVEL(3, "Reusing decompression context \n"); + { size_t const missingBytes = 4; + size_t iSize = compressedBufferSize - missingBytes; + const BYTE* cBuff = (const BYTE*) compressedBuffer; + BYTE* const ostart = (BYTE*)decodedBuffer; + BYTE* op = ostart; + BYTE* const oend = (BYTE*)decodedBuffer + COMPRESSIBLE_NOISE_LENGTH; + size_t decResult, oSize = COMPRESSIBLE_NOISE_LENGTH; + DISPLAYLEVEL(3, "Missing last %u bytes : ", (U32)missingBytes); + CHECK_V(decResult, LZ4F_decompress(dCtx, op, &oSize, cBuff, &iSize, NULL)); + if (decResult != missingBytes) { + DISPLAY("%u bytes missing != %u bytes requested \n", (U32)missingBytes, (U32)decResult); + goto _output_error; + } + DISPLAYLEVEL(3, "indeed, requests %u bytes \n", (unsigned)decResult); + cBuff += iSize; + iSize = decResult; + op += oSize; + oSize = (size_t)(oend-op); + decResult = LZ4F_decompress(dCtx, op, &oSize, cBuff, &iSize, NULL); + if (decResult != 0) goto _output_error; /* should finish now */ + op += oSize; + if (op>oend) { DISPLAY("decompression write overflow \n"); goto _output_error; } + { U64 const crcDest = XXH64(decodedBuffer, (size_t)(op-ostart), 1); + if (crcDest != crcOrig) goto _output_error; + } } + + { size_t oSize = 0; + size_t iSize = 0; + LZ4F_frameInfo_t fi; + const BYTE* ip = (BYTE*)compressedBuffer; + + DISPLAYLEVEL(3, "Start by feeding 0 bytes, to get next input size : "); + CHECK( LZ4F_decompress(dCtx, NULL, &oSize, ip, &iSize, NULL) ); + //DISPLAYLEVEL(3, " %u \n", (unsigned)errorCode); + DISPLAYLEVEL(3, " OK \n"); + + DISPLAYLEVEL(3, "LZ4F_getFrameInfo on zero-size input : "); + { size_t nullSize = 0; + size_t const fiError = LZ4F_getFrameInfo(dCtx, &fi, ip, &nullSize); + if (LZ4F_getErrorCode(fiError) != LZ4F_ERROR_frameHeader_incomplete) { + DISPLAYLEVEL(3, "incorrect error : %s != ERROR_frameHeader_incomplete \n", + LZ4F_getErrorName(fiError)); + goto _output_error; + } + DISPLAYLEVEL(3, " correctly failed : %s \n", LZ4F_getErrorName(fiError)); + } + + DISPLAYLEVEL(3, "LZ4F_getFrameInfo on not enough input : "); + { size_t inputSize = 6; + size_t const fiError = LZ4F_getFrameInfo(dCtx, &fi, ip, &inputSize); + if (LZ4F_getErrorCode(fiError) != LZ4F_ERROR_frameHeader_incomplete) { + DISPLAYLEVEL(3, "incorrect error : %s != ERROR_frameHeader_incomplete \n", LZ4F_getErrorName(fiError)); + goto _output_error; + } + DISPLAYLEVEL(3, " correctly failed : %s \n", LZ4F_getErrorName(fiError)); + } + + DISPLAYLEVEL(3, "LZ4F_getFrameInfo on enough input : "); + iSize = LZ4F_headerSize(ip, LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH); + CHECK( iSize ); + CHECK( LZ4F_getFrameInfo(dCtx, &fi, ip, &iSize) ); + DISPLAYLEVEL(3, " correctly decoded \n"); + } + + DISPLAYLEVEL(3, "Decode a buggy input : "); + assert(COMPRESSIBLE_NOISE_LENGTH > 64); + assert(cSize > 48); + memcpy(decodedBuffer, (char*)compressedBuffer+16, 32); /* save correct data */ + memcpy((char*)compressedBuffer+16, (const char*)decodedBuffer+32, 32); /* insert noise */ + { size_t dbSize = COMPRESSIBLE_NOISE_LENGTH; + size_t cbSize = cSize; + size_t const decompressError = LZ4F_decompress(dCtx, decodedBuffer, &dbSize, + compressedBuffer, &cbSize, + NULL); + if (!LZ4F_isError(decompressError)) goto _output_error; + DISPLAYLEVEL(3, "error detected : %s \n", LZ4F_getErrorName(decompressError)); + } + memcpy((char*)compressedBuffer+16, decodedBuffer, 32); /* restore correct data */ + + DISPLAYLEVEL(3, "Reset decompression context, since it's left in error state \n"); + LZ4F_resetDecompressionContext(dCtx); /* always successful */ + + DISPLAYLEVEL(3, "Byte after byte : "); + { BYTE* const ostart = (BYTE*)decodedBuffer; + BYTE* op = ostart; + BYTE* const oend = (BYTE*)decodedBuffer + COMPRESSIBLE_NOISE_LENGTH; + const BYTE* ip = (const BYTE*) compressedBuffer; + const BYTE* const iend = ip + cSize; + while (ip < iend) { + size_t oSize = (size_t)(oend-op); + size_t iSize = 1; + CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) ); + op += oSize; + ip += iSize; + } + { U64 const crcDest = XXH64(decodedBuffer, COMPRESSIBLE_NOISE_LENGTH, 1); + if (crcDest != crcOrig) goto _output_error; + } + DISPLAYLEVEL(3, "Regenerated %u/%u bytes \n", (unsigned)(op-ostart), (unsigned)COMPRESSIBLE_NOISE_LENGTH); + } + } + + DISPLAYLEVEL(3, "Using 64 KB block : "); + prefs.frameInfo.blockSizeID = LZ4F_max64KB; + prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs)); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + + DISPLAYLEVEL(3, "without checksum : "); + prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs)); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + + DISPLAYLEVEL(3, "Using 256 KB block : "); + prefs.frameInfo.blockSizeID = LZ4F_max256KB; + prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs)); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + + DISPLAYLEVEL(3, "Decompression test : \n"); + { size_t const decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH; + unsigned const maxBits = FUZ_highbit((U32)decodedBufferSize); + BYTE* const ostart = (BYTE*)decodedBuffer; + BYTE* op = ostart; + BYTE* const oend = ostart + COMPRESSIBLE_NOISE_LENGTH; + const BYTE* ip = (const BYTE*)compressedBuffer; + const BYTE* const iend = (const BYTE*)compressedBuffer + cSize; + + DISPLAYLEVEL(3, "random segment sizes : "); + while (ip < iend) { + unsigned const nbBits = FUZ_rand(&randState) % maxBits; + size_t iSize = (FUZ_rand(&randState) & ((1< (size_t)(iend-ip)) iSize = (size_t)(iend-ip); + CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) ); + op += oSize; + ip += iSize; + } + { size_t const decodedSize = (size_t)(op - ostart); + U64 const crcDest = XXH64(decodedBuffer, decodedSize, 1); + if (crcDest != crcOrig) goto _output_error; + DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize); + } + + CHECK( LZ4F_freeDecompressionContext(dCtx) ); + dCtx = NULL; + } + + DISPLAYLEVEL(3, "without checksum : "); + prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs) ); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + + DISPLAYLEVEL(3, "Using 1 MB block : "); + prefs.frameInfo.blockSizeID = LZ4F_max1MB; + prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs) ); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + + DISPLAYLEVEL(3, "without frame checksum : "); + prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs) ); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + + DISPLAYLEVEL(3, "Using 4 MB block : "); + prefs.frameInfo.blockSizeID = LZ4F_max4MB; + prefs.frameInfo.contentChecksumFlag = LZ4F_contentChecksumEnabled; + { size_t const dstCapacity = LZ4F_compressFrameBound(testSize, &prefs); + DISPLAYLEVEL(4, "dstCapacity = %u ; ", (U32)dstCapacity) + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, dstCapacity, CNBuffer, testSize, &prefs) ); + DISPLAYLEVEL(3, "Compressed %u bytes into a %u bytes frame \n", (U32)testSize, (U32)cSize); + } + + DISPLAYLEVEL(3, "without frame checksum : "); + prefs.frameInfo.contentChecksumFlag = LZ4F_noContentChecksum; + { size_t const dstCapacity = LZ4F_compressFrameBound(testSize, &prefs); + DISPLAYLEVEL(4, "dstCapacity = %u ; ", (U32)dstCapacity) + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, dstCapacity, CNBuffer, testSize, &prefs) ); + DISPLAYLEVEL(3, "Compressed %u bytes into a %u bytes frame \n", (U32)testSize, (U32)cSize); + } + + DISPLAYLEVEL(3, "LZ4F_compressFrame with block checksum : "); + memset(&prefs, 0, sizeof(prefs)); + prefs.frameInfo.blockChecksumFlag = LZ4F_blockChecksumEnabled; + CHECK_V(cSize, LZ4F_compressFrame(compressedBuffer, LZ4F_compressFrameBound(testSize, &prefs), CNBuffer, testSize, &prefs) ); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)cSize); + + DISPLAYLEVEL(3, "Decompress with block checksum : "); + { size_t iSize = cSize; + size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH; + LZ4F_decompressionContext_t dctx; + CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) ); + CHECK( LZ4F_decompress(dctx, decodedBuffer, &decodedSize, compressedBuffer, &iSize, NULL) ); + if (decodedSize != testSize) goto _output_error; + if (iSize != cSize) goto _output_error; + { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 1); + U64 const crcSrc = XXH64(CNBuffer, testSize, 1); + if (crcDest != crcSrc) goto _output_error; + } + DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize); + + CHECK( LZ4F_freeDecompressionContext(dctx) ); + } + + /* frame content size tests */ + { size_t cErr; + BYTE* const ostart = (BYTE*)compressedBuffer; + BYTE* op = ostart; + CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) ); + + DISPLAYLEVEL(3, "compress without frameSize : "); + memset(&(prefs.frameInfo), 0, sizeof(prefs.frameInfo)); + CHECK_V(cErr, LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs)); + op += cErr; + CHECK_V(cErr, LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL)); + op += cErr; + CHECK( LZ4F_compressEnd(cctx, compressedBuffer, testSize, NULL) ); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)(op-ostart)); + + DISPLAYLEVEL(3, "compress with frameSize : "); + prefs.frameInfo.contentSize = testSize; + op = ostart; + CHECK_V(cErr, LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs)); + op += cErr; + CHECK_V(cErr, LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL)); + op += cErr; + CHECK( LZ4F_compressEnd(cctx, compressedBuffer, testSize, NULL) ); + DISPLAYLEVEL(3, "Compressed %i bytes into a %i bytes frame \n", (int)testSize, (int)(op-ostart)); + + DISPLAYLEVEL(3, "compress with wrong frameSize : "); + prefs.frameInfo.contentSize = testSize+1; + op = ostart; + CHECK_V(cErr, LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs)); + op += cErr; + CHECK_V(cErr, LZ4F_compressUpdate(cctx, op, LZ4F_compressBound(testSize, &prefs), CNBuffer, testSize, NULL)); + op += cErr; + cErr = LZ4F_compressEnd(cctx, op, testSize, NULL); + if (!LZ4F_isError(cErr)) goto _output_error; + DISPLAYLEVEL(3, "Error correctly detected : %s \n", LZ4F_getErrorName(cErr)); + + CHECK( LZ4F_freeCompressionContext(cctx) ); + cctx = NULL; + } + + /* dictID tests */ + { size_t cErr; + U32 const dictID = 0x99; + CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) ); + + DISPLAYLEVEL(3, "insert a dictID : "); + memset(&prefs.frameInfo, 0, sizeof(prefs.frameInfo)); + prefs.frameInfo.dictID = dictID; + CHECK_V(cErr, LZ4F_compressBegin(cctx, compressedBuffer, testSize, &prefs)); + DISPLAYLEVEL(3, "created frame header of size %i bytes \n", (int)cErr); + + DISPLAYLEVEL(3, "read a dictID : "); + CHECK( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) ); + memset(&prefs.frameInfo, 0, sizeof(prefs.frameInfo)); + CHECK( LZ4F_getFrameInfo(dCtx, &prefs.frameInfo, compressedBuffer, &cErr) ); + if (prefs.frameInfo.dictID != dictID) goto _output_error; + DISPLAYLEVEL(3, "%u \n", (U32)prefs.frameInfo.dictID); + + CHECK( LZ4F_freeDecompressionContext(dCtx) ); dCtx = NULL; + CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL; + } + + /* Dictionary compression test */ + { size_t const dictSize = 63 KB; + size_t const dstCapacity = LZ4F_compressFrameBound(dictSize, NULL); + size_t cSizeNoDict, cSizeWithDict; + LZ4F_CDict* const cdict = LZ4F_createCDict(CNBuffer, dictSize); + if (cdict == NULL) goto _output_error; + CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) ); + + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with NULL dict : "); + CHECK_V(cSizeNoDict, + LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity, + CNBuffer, dictSize, + NULL, NULL) ); + DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeNoDict); + + CHECK( LZ4F_freeCompressionContext(cctx) ); + CHECK( LZ4F_createCompressionContext(&cctx, LZ4F_VERSION) ); + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict : "); + CHECK_V(cSizeWithDict, + LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity, + CNBuffer, dictSize, + cdict, NULL) ); + DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n", + (unsigned)dictSize, (unsigned)cSizeWithDict); + if ((LZ4_DISTANCE_MAX > dictSize) && (cSizeWithDict >= cSizeNoDict)) goto _output_error; /* must be more efficient */ + crcOrig = XXH64(CNBuffer, dictSize, 0); + + DISPLAYLEVEL(3, "LZ4F_decompress_usingDict : "); + { LZ4F_dctx* dctx; + size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH; + size_t compressedSize = cSizeWithDict; + CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) ); + CHECK( LZ4F_decompress_usingDict(dctx, + decodedBuffer, &decodedSize, + compressedBuffer, &compressedSize, + CNBuffer, dictSize, + NULL) ); + if (compressedSize != cSizeWithDict) goto _output_error; + if (decodedSize != dictSize) goto _output_error; + { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0); + if (crcDest != crcOrig) goto _output_error; } + DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize); + CHECK( LZ4F_freeDecompressionContext(dctx) ); + } + + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict, negative level : "); + { size_t cSizeLevelMax; + LZ4F_preferences_t cParams; + memset(&cParams, 0, sizeof(cParams)); + cParams.compressionLevel = -3; + CHECK_V(cSizeLevelMax, + LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity, + CNBuffer, dictSize, + cdict, &cParams) ); + DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeLevelMax); + } + + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, with dict, level max : "); + { size_t cSizeLevelMax; + LZ4F_preferences_t cParams; + memset(&cParams, 0, sizeof(cParams)); + cParams.compressionLevel = LZ4F_compressionLevel_max(); + CHECK_V(cSizeLevelMax, + LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, dstCapacity, + CNBuffer, dictSize, + cdict, &cParams) ); + DISPLAYLEVEL(3, "%u bytes \n", (unsigned)cSizeLevelMax); + } + + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, multiple linked blocks : "); + { size_t cSizeContiguous; + size_t const inSize = dictSize * 3; + size_t const outCapacity = LZ4F_compressFrameBound(inSize, NULL); + LZ4F_preferences_t cParams; + memset(&cParams, 0, sizeof(cParams)); + cParams.frameInfo.blockMode = LZ4F_blockLinked; + cParams.frameInfo.blockSizeID = LZ4F_max64KB; + CHECK_V(cSizeContiguous, + LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, outCapacity, + CNBuffer, inSize, + cdict, &cParams) ); + DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n", + (unsigned)inSize, (unsigned)cSizeContiguous); + + DISPLAYLEVEL(3, "LZ4F_decompress_usingDict on multiple linked blocks : "); + { LZ4F_dctx* dctx; + size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH; + size_t compressedSize = cSizeContiguous; + CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) ); + CHECK( LZ4F_decompress_usingDict(dctx, + decodedBuffer, &decodedSize, + compressedBuffer, &compressedSize, + CNBuffer, dictSize, + NULL) ); + if (compressedSize != cSizeContiguous) goto _output_error; + if (decodedSize != inSize) goto _output_error; + crcOrig = XXH64(CNBuffer, inSize, 0); + { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0); + if (crcDest != crcOrig) goto _output_error; } + DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize); + CHECK( LZ4F_freeDecompressionContext(dctx) ); + } + } + + + DISPLAYLEVEL(3, "LZ4F_compressFrame_usingCDict, multiple independent blocks : "); + { size_t cSizeIndep; + size_t const inSize = dictSize * 3; + size_t const outCapacity = LZ4F_compressFrameBound(inSize, NULL); + LZ4F_preferences_t cParams; + memset(&cParams, 0, sizeof(cParams)); + cParams.frameInfo.blockMode = LZ4F_blockIndependent; + cParams.frameInfo.blockSizeID = LZ4F_max64KB; + CHECK_V(cSizeIndep, + LZ4F_compressFrame_usingCDict(cctx, compressedBuffer, outCapacity, + CNBuffer, inSize, + cdict, &cParams) ); + DISPLAYLEVEL(3, "compressed %u bytes into %u bytes \n", + (unsigned)inSize, (unsigned)cSizeIndep); + + DISPLAYLEVEL(3, "LZ4F_decompress_usingDict on multiple independent blocks : "); + { LZ4F_dctx* dctx; + size_t decodedSize = COMPRESSIBLE_NOISE_LENGTH; + size_t compressedSize = cSizeIndep; + CHECK( LZ4F_createDecompressionContext(&dctx, LZ4F_VERSION) ); + CHECK( LZ4F_decompress_usingDict(dctx, + decodedBuffer, &decodedSize, + compressedBuffer, &compressedSize, + CNBuffer, dictSize, + NULL) ); + if (compressedSize != cSizeIndep) goto _output_error; + if (decodedSize != inSize) goto _output_error; + crcOrig = XXH64(CNBuffer, inSize, 0); + { U64 const crcDest = XXH64(decodedBuffer, decodedSize, 0); + if (crcDest != crcOrig) goto _output_error; } + DISPLAYLEVEL(3, "Regenerated %u bytes \n", (U32)decodedSize); + CHECK( LZ4F_freeDecompressionContext(dctx) ); + } + } + + LZ4F_freeCDict(cdict); + CHECK( LZ4F_freeCompressionContext(cctx) ); cctx = NULL; + } + + DISPLAYLEVEL(3, "getBlockSize test: \n"); + { size_t result; + unsigned blockSizeID; + for (blockSizeID = 4; blockSizeID < 8; ++blockSizeID) { + result = LZ4F_getBlockSize(blockSizeID); + CHECK(result); + DISPLAYLEVEL(3, "Returned block size of %u bytes for blockID %u \n", + (unsigned)result, blockSizeID); + } + + /* Test an invalid input that's too large */ + result = LZ4F_getBlockSize(8); + if(!LZ4F_isError(result) || + LZ4F_getErrorCode(result) != LZ4F_ERROR_maxBlockSize_invalid) + goto _output_error; + + /* Test an invalid input that's too small */ + result = LZ4F_getBlockSize(3); + if(!LZ4F_isError(result) || + LZ4F_getErrorCode(result) != LZ4F_ERROR_maxBlockSize_invalid) + goto _output_error; + } + + + DISPLAYLEVEL(3, "Skippable frame test : \n"); + { size_t decodedBufferSize = COMPRESSIBLE_NOISE_LENGTH; + unsigned maxBits = FUZ_highbit((U32)decodedBufferSize); + BYTE* op = (BYTE*)decodedBuffer; + BYTE* const oend = (BYTE*)decodedBuffer + COMPRESSIBLE_NOISE_LENGTH; + BYTE* ip = (BYTE*)compressedBuffer; + BYTE* iend = (BYTE*)compressedBuffer + cSize + 8; + + CHECK( LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION) ); + + /* generate skippable frame */ + FUZ_writeLE32(ip, LZ4F_MAGIC_SKIPPABLE_START); + FUZ_writeLE32(ip+4, (U32)cSize); + + DISPLAYLEVEL(3, "random segment sizes : \n"); + while (ip < iend) { + unsigned nbBits = FUZ_rand(&randState) % maxBits; + size_t iSize = (FUZ_rand(&randState) & ((1< (size_t)(iend-ip)) iSize = (size_t)(iend-ip); + CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) ); + op += oSize; + ip += iSize; + } + DISPLAYLEVEL(3, "Skipped %i bytes \n", (int)decodedBufferSize); + + /* generate zero-size skippable frame */ + DISPLAYLEVEL(3, "zero-size skippable frame\n"); + ip = (BYTE*)compressedBuffer; + op = (BYTE*)decodedBuffer; + FUZ_writeLE32(ip, LZ4F_MAGIC_SKIPPABLE_START+1); + FUZ_writeLE32(ip+4, 0); + iend = ip+8; + + while (ip < iend) { + unsigned const nbBits = FUZ_rand(&randState) % maxBits; + size_t iSize = (FUZ_rand(&randState) & ((1< (size_t)(iend-ip)) iSize = (size_t)(iend-ip); + CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) ); + op += oSize; + ip += iSize; + } + DISPLAYLEVEL(3, "Skipped %i bytes \n", (int)(ip - (BYTE*)compressedBuffer - 8)); + + DISPLAYLEVEL(3, "Skippable frame header complete in first call \n"); + ip = (BYTE*)compressedBuffer; + op = (BYTE*)decodedBuffer; + FUZ_writeLE32(ip, LZ4F_MAGIC_SKIPPABLE_START+2); + FUZ_writeLE32(ip+4, 10); + iend = ip+18; + while (ip < iend) { + size_t iSize = 10; + size_t oSize = 10; + if (iSize > (size_t)(iend-ip)) iSize = (size_t)(iend-ip); + CHECK( LZ4F_decompress(dCtx, op, &oSize, ip, &iSize, NULL) ); + op += oSize; + ip += iSize; + } + DISPLAYLEVEL(3, "Skipped %i bytes \n", (int)(ip - (BYTE*)compressedBuffer - 8)); + } + + DISPLAY("Basic tests completed \n"); +_end: + free(CNBuffer); + free(compressedBuffer); + free(decodedBuffer); + LZ4F_freeDecompressionContext(dCtx); dCtx = NULL; + LZ4F_freeCompressionContext(cctx); cctx = NULL; + return basicTests_error; + +_output_error: + basicTests_error = 1; + DISPLAY("Error detected ! \n"); + goto _end; +} + + +typedef enum { o_contiguous, o_noncontiguous, o_overwrite } o_scenario_e; + +static void locateBuffDiff(const void* buff1, const void* buff2, size_t size, o_scenario_e o_scenario) +{ + if (displayLevel >= 2) { + size_t p=0; + const BYTE* b1=(const BYTE*)buff1; + const BYTE* b2=(const BYTE*)buff2; + DISPLAY("locateBuffDiff: looking for error position \n"); + if (o_scenario != o_contiguous) { + DISPLAY("mode %i: non-contiguous output (%u bytes), cannot search \n", + (int)o_scenario, (unsigned)size); + return; + } + while (p < size && b1[p]==b2[p]) p++; + if (p != size) { + DISPLAY("Error at pos %i/%i : %02X != %02X \n", (int)p, (int)size, b1[p], b2[p]); + } + } +} + +# define EXIT_MSG(...) { DISPLAY("Error => "); DISPLAY(__VA_ARGS__); \ + DISPLAY(" (seed %u, test nb %u) \n", seed, testNb); exit(1); } +# undef CHECK +# define CHECK(cond, ...) { if (cond) { EXIT_MSG(__VA_ARGS__); } } + + +size_t test_lz4f_decompression_wBuffers( + const void* cSrc, size_t cSize, + void* dst, size_t dstCapacity, o_scenario_e o_scenario, + const void* srcRef, size_t decompressedSize, + U64 crcOrig, + U32* const randState, + LZ4F_dctx* const dCtx, + U32 seed, U32 testNb, + int findErrorPos) +{ + const BYTE* ip = (const BYTE*)cSrc; + const BYTE* const iend = ip + cSize; + + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstCapacity; + + unsigned const suggestedBits = FUZ_highbit((U32)cSize); + unsigned const maxBits = MAX(3, suggestedBits); + size_t totalOut = 0; + size_t moreToFlush = 0; + XXH64_state_t xxh64; + XXH64_reset(&xxh64, 1); + assert(ip < iend); + while (ip < iend) { + unsigned const nbBitsI = (FUZ_rand(randState) % (maxBits-1)) + 1; + unsigned const nbBitsO = (FUZ_rand(randState) % (maxBits)) + 1; + size_t const iSizeCand = (FUZ_rand(randState) & ((1< 2x4MB to test large blocks */ + void* CNBuffer = NULL; + size_t const compressedBufferSize = LZ4F_compressFrameBound(CNBufferLength, NULL) + 4 MB; /* needs some margin */ + void* compressedBuffer = NULL; + void* decodedBuffer = NULL; + U32 coreRand = seed; + LZ4F_decompressionContext_t dCtx = NULL; + LZ4F_decompressionContext_t dCtxNoise = NULL; + LZ4F_compressionContext_t cCtx = NULL; + clock_t const startClock = clock(); + clock_t const clockDuration = duration_s * CLOCKS_PER_SEC; + + /* Create buffers */ + { size_t const creationStatus = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION); + CHECK(LZ4F_isError(creationStatus), "Allocation failed (error %i)", (int)creationStatus); } + { size_t const creationStatus = LZ4F_createDecompressionContext(&dCtxNoise, LZ4F_VERSION); + CHECK(LZ4F_isError(creationStatus), "Allocation failed (error %i)", (int)creationStatus); } + { size_t const creationStatus = LZ4F_createCompressionContext(&cCtx, LZ4F_VERSION); + CHECK(LZ4F_isError(creationStatus), "Allocation failed (error %i)", (int)creationStatus); } + CNBuffer = malloc(CNBufferLength); + CHECK(CNBuffer==NULL, "CNBuffer Allocation failed"); + compressedBuffer = malloc(compressedBufferSize); + CHECK(compressedBuffer==NULL, "compressedBuffer Allocation failed"); + decodedBuffer = calloc(1, CNBufferLength); /* calloc avoids decodedBuffer being considered "garbage" by scan-build */ + CHECK(decodedBuffer==NULL, "decodedBuffer Allocation failed"); + FUZ_fillCompressibleNoiseBuffer(CNBuffer, CNBufferLength, compressibility, &coreRand); + + /* jump to requested testNb */ + for (testNb =0; (testNb < startTest); testNb++) (void)FUZ_rand(&coreRand); /* sync randomizer */ + + /* main fuzzer test loop */ + for ( ; (testNb < nbTests) || (clockDuration > FUZ_GetClockSpan(startClock)) ; testNb++) { + U32 randState = coreRand ^ prime1; + unsigned const srcBits = (FUZ_rand(&randState) % (FUZ_highbit((U32)(CNBufferLength-1)) - 1)) + 1; + size_t const srcSize = (FUZ_rand(&randState) & ((1< frameInfo.blockChecksumFlag) { + U32 const bc32 = XXH32(op, 0, 0); + op[0] = (BYTE)bc32; /* little endian format */ + op[1] = (BYTE)(bc32>>8); + op[2] = (BYTE)(bc32>>16); + op[3] = (BYTE)(bc32>>24); + op += 4; + } } } } + } /* while (ip =oend, "LZ4F_compressFrameBound overflow"); + { size_t const dstEndSafeSize = LZ4F_compressBound(0, prefsPtr); + int const tooSmallDstEnd = ((FUZ_rand(&randState) & 31) == 3); + size_t const dstEndTooSmallSize = (FUZ_rand(&randState) % dstEndSafeSize) + 1; + size_t const dstEndSize = tooSmallDstEnd ? dstEndTooSmallSize : dstEndSafeSize; + BYTE const canaryByte = (BYTE)(FUZ_rand(&randState) & 255); + size_t flushedSize; + DISPLAYLEVEL(7,"canaryByte at pos %u / %u \n", + (unsigned)((size_t)(op - (BYTE*)compressedBuffer) + dstEndSize), + (unsigned)compressedBufferSize); + assert(op + dstEndSize < (BYTE*)compressedBuffer + compressedBufferSize); + op[dstEndSize] = canaryByte; + flushedSize = LZ4F_compressEnd(cCtx, op, dstEndSize, &cOptions); + CHECK(op[dstEndSize] != canaryByte, "LZ4F_compressEnd writes beyond dstCapacity !"); + if (LZ4F_isError(flushedSize)) { + if (tooSmallDstEnd) /* failure is allowed */ continue; + CHECK(!tooSmallDstEnd, "Compression completion failed (error %i : %s)", + (int)flushedSize, LZ4F_getErrorName(flushedSize)); + } + op += flushedSize; + } + cSize = (size_t)(op - (BYTE*)compressedBuffer); + DISPLAYLEVEL(5, "\nCompressed %u bytes into %u \n", (U32)srcSize, (U32)cSize); + } + + + /* multi-segments decompression */ + DISPLAYLEVEL(6, "normal decompression \n"); + { size_t result = test_lz4f_decompression(compressedBuffer, cSize, srcStart, srcSize, crcOrig, &randState, dCtx, seed, testNb, 1 /*findError*/ ); + CHECK (LZ4F_isError(result), "multi-segment decompression failed (error %i => %s)", + (int)result, LZ4F_getErrorName(result)); + } + +#if 1 + /* insert noise into src */ + { U32 const maxNbBits = FUZ_highbit((U32)cSize); + size_t pos = 0; + for (;;) { + /* keep some original src */ + { U32 const nbBits = FUZ_rand(&randState) % maxNbBits; + size_t const mask = (1< = cSize) break; + /* add noise */ + { U32 const nbBitsCodes = FUZ_rand(&randState) % maxNbBits; + U32 const nbBits = nbBitsCodes ? nbBitsCodes-1 : 0; + size_t const mask = (1< ='0') && (*argument<='9')) { + nbTests *= 10; + nbTests += (unsigned)(*argument - '0'); + argument++; + } + break; + + case 'T': + argument++; + nbTests = 0; duration = 0; + for (;;) { + switch(*argument) + { + case 'm': duration *= 60; argument++; continue; + case 's': + case 'n': argument++; continue; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': duration *= 10; duration += (U32)(*argument++ - '0'); continue; + } + break; + } + break; + + case 's': + argument++; + seed=0; + seedset=1; + while ((*argument>='0') && (*argument<='9')) { + seed *= 10; + seed += (U32)(*argument - '0'); + argument++; + } + break; + case 't': + argument++; + testNb=0; + while ((*argument>='0') && (*argument<='9')) { + testNb *= 10; + testNb += (unsigned)(*argument - '0'); + argument++; + } + break; + case 'P': /* compressibility % */ + argument++; + proba=0; + while ((*argument>='0') && (*argument<='9')) { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba<0) proba=0; + if (proba>100) proba=100; + break; + default: + ; + return FUZ_usage(programName); + } + } + } + } + + /* Get Seed */ + DISPLAY("Starting lz4frame tester (%i-bits, %s)\n", (int)(sizeof(size_t)*8), LZ4_VERSION_STRING); + + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + DISPLAY("Seed = %u\n", seed); + if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) DISPLAY("Compressibility : %i%%\n", proba); + + nbTests += (nbTests==0); /* avoid zero */ + + if (testNb==0) result = basicTests(seed, ((double)proba) / 100); + if (result) return 1; + return fuzzerTests(seed, nbTests, testNb, ((double)proba) / 100, duration); +} diff --git a/tests/fullbench.c b/tests/fullbench.c new file mode 100644 index 0000000..cb9b684 --- /dev/null +++ b/tests/fullbench.c @@ -0,0 +1,869 @@ +/* + bench.c - Demo program to benchmark open-source compression algorithm + Copyright (C) Yann Collet 2012-2016 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 source repository : https://github.com/lz4/lz4 + - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c +*/ + + +#if defined(_MSC_VER) || defined(_WIN32) + /* S_ISREG & gettimeofday() are not supported by MSVC */ +# define BMK_LEGACY_TIMER 1 +#endif + + +/************************************** +* Includes +**************************************/ +#include "platform.h" /* _CRT_SECURE_NO_WARNINGS, Large Files support */ +#include "util.h" /* U32, UTIL_getFileSize */ +#include /* malloc, free */ +#include /* fprintf, fopen, ftello */ +#include /* stat64 */ +#include /* stat64 */ +#include /* strcmp */ +#include /* clock_t, clock(), CLOCKS_PER_SEC */ + +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* LZ4_decompress_fast */ +#include "lz4.h" +#include "lz4hc.h" +#include "lz4frame.h" + +#include "xxhash.h" + + +/************************************** +* Constants +**************************************/ +#define PROGRAM_DESCRIPTION "LZ4 speed analyzer" +#define AUTHOR "Yann Collet" +#define WELCOME_MESSAGE "*** %s v%s %i-bits, by %s ***\n", PROGRAM_DESCRIPTION, LZ4_VERSION_STRING, (int)(sizeof(void*)*8), AUTHOR + +#define NBLOOPS 6 +#define TIMELOOP (CLOCKS_PER_SEC * 25 / 10) + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define KNUTH 2654435761U +#define MAX_MEM (1920 MB) +#define DEFAULT_CHUNKSIZE (4 MB) + +#define ALL_COMPRESSORS 0 +#define ALL_DECOMPRESSORS 0 + + +/************************************** +* Local structures +**************************************/ +struct chunkParameters +{ + U32 id; + char* origBuffer; + char* compressedBuffer; + int origSize; + int compressedSize; +}; + + +/************************************** +* Macros +**************************************/ +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define PROGRESS(...) g_noPrompt ? 0 : DISPLAY(__VA_ARGS__) + + +/************************************** +* Benchmark Parameters +**************************************/ +static int g_chunkSize = DEFAULT_CHUNKSIZE; +static int g_nbIterations = NBLOOPS; +static int g_pause = 0; +static int g_compressionTest = 1; +static int g_compressionAlgo = ALL_COMPRESSORS; +static int g_decompressionTest = 1; +static int g_decompressionAlgo = ALL_DECOMPRESSORS; +static int g_noPrompt = 0; + +static void BMK_setBlocksize(int bsize) +{ + g_chunkSize = bsize; + DISPLAY("-Using Block Size of %i KB-\n", g_chunkSize>>10); +} + +static void BMK_setNbIterations(int nbLoops) +{ + g_nbIterations = nbLoops; + DISPLAY("- %i iterations -\n", g_nbIterations); +} + +static void BMK_setPause(void) +{ + g_pause = 1; +} + + +/********************************************************* +* Private functions +*********************************************************/ +static clock_t BMK_GetClockSpan( clock_t clockStart ) +{ + return clock() - clockStart; /* works even if overflow; max span ~30 mn */ +} + + +static size_t BMK_findMaxMem(U64 requiredMem) +{ + size_t step = 64 MB; + BYTE* testmem = NULL; + + requiredMem = (((requiredMem >> 26) + 1) << 26); + requiredMem += 2*step; + if (requiredMem > MAX_MEM) requiredMem = MAX_MEM; + + while (!testmem) { + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; + testmem = (BYTE*) malloc ((size_t)requiredMem); + } + free (testmem); + + /* keep some space available */ + if (requiredMem > step) requiredMem -= step; + else requiredMem >>= 1; + + return (size_t)requiredMem; +} + + +/********************************************************* +* Memory management, to test LZ4_USER_MEMORY_FUNCTIONS +*********************************************************/ +void* LZ4_malloc(size_t s) { return malloc(s); } +void* LZ4_calloc(size_t n, size_t s) { return calloc(n,s); } +void LZ4_free(void* p) { free(p); } + + +/********************************************************* +* Benchmark function +*********************************************************/ +static LZ4_stream_t LZ4_stream; +static void local_LZ4_resetDictT(void) +{ + void* const r = LZ4_initStream(&LZ4_stream, sizeof(LZ4_stream)); + assert(r != NULL); (void)r; +} + +static void local_LZ4_createStream(void) +{ + void* const r = LZ4_initStream(&LZ4_stream, sizeof(LZ4_stream)); + assert(r != NULL); (void)r; +} + +static int local_LZ4_saveDict(const char* in, char* out, int inSize) +{ + (void)in; + return LZ4_saveDict(&LZ4_stream, out, inSize); +} + +static int local_LZ4_compress_default_large(const char* in, char* out, int inSize) +{ + return LZ4_compress_default(in, out, inSize, LZ4_compressBound(inSize)); +} + +static int local_LZ4_compress_default_small(const char* in, char* out, int inSize) +{ + return LZ4_compress_default(in, out, inSize, LZ4_compressBound(inSize)-1); +} + +static int local_LZ4_compress_destSize(const char* in, char* out, int inSize) +{ + return LZ4_compress_destSize(in, out, &inSize, LZ4_compressBound(inSize)-1); +} + +static int local_LZ4_compress_fast0(const char* in, char* out, int inSize) +{ + return LZ4_compress_fast(in, out, inSize, LZ4_compressBound(inSize), 0); +} + +static int local_LZ4_compress_fast1(const char* in, char* out, int inSize) +{ + return LZ4_compress_fast(in, out, inSize, LZ4_compressBound(inSize), 1); +} + +static int local_LZ4_compress_fast2(const char* in, char* out, int inSize) +{ + return LZ4_compress_fast(in, out, inSize, LZ4_compressBound(inSize), 2); +} + +static int local_LZ4_compress_fast17(const char* in, char* out, int inSize) +{ + return LZ4_compress_fast(in, out, inSize, LZ4_compressBound(inSize), 17); +} + +static int local_LZ4_compress_fast_extState0(const char* in, char* out, int inSize) +{ + return LZ4_compress_fast_extState(&LZ4_stream, in, out, inSize, LZ4_compressBound(inSize), 0); +} + +static int local_LZ4_compress_fast_continue0(const char* in, char* out, int inSize) +{ + return LZ4_compress_fast_continue(&LZ4_stream, in, out, inSize, LZ4_compressBound(inSize), 0); +} + +#ifndef LZ4_DLL_IMPORT +#if defined (__cplusplus) +extern "C" { +#endif + +/* declare hidden function */ +extern int LZ4_compress_forceExtDict (LZ4_stream_t* LZ4_stream, const char* source, char* dest, int inputSize); + +#if defined (__cplusplus) +} +#endif + +static int local_LZ4_compress_forceDict(const char* in, char* out, int inSize) +{ + return LZ4_compress_forceExtDict(&LZ4_stream, in, out, inSize); +} +#endif + + +/* HC compression functions */ +LZ4_streamHC_t LZ4_streamHC; +static void local_LZ4_resetStreamHC(void) +{ + LZ4_initStreamHC(&LZ4_streamHC, sizeof(LZ4_streamHC)); +} + +static int local_LZ4_saveDictHC(const char* in, char* out, int inSize) +{ + (void)in; + return LZ4_saveDictHC(&LZ4_streamHC, out, inSize); +} + +static int local_LZ4_compress_HC(const char* in, char* out, int inSize) +{ + return LZ4_compress_HC(in, out, inSize, LZ4_compressBound(inSize), 9); +} + +static int local_LZ4_compress_HC_extStateHC(const char* in, char* out, int inSize) +{ + return LZ4_compress_HC_extStateHC(&LZ4_streamHC, in, out, inSize, LZ4_compressBound(inSize), 9); +} + +static int local_LZ4_compress_HC_continue(const char* in, char* out, int inSize) +{ + return LZ4_compress_HC_continue(&LZ4_streamHC, in, out, inSize, LZ4_compressBound(inSize)); +} + + +/* decompression functions */ +static int local_LZ4_decompress_fast(const char* in, char* out, int inSize, int outSize) +{ + (void)inSize; + LZ4_decompress_fast(in, out, outSize); + return outSize; +} + +static int local_LZ4_decompress_fast_usingDict_prefix(const char* in, char* out, int inSize, int outSize) +{ + (void)inSize; + LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65536); + return outSize; +} + +static int local_LZ4_decompress_fast_usingExtDict(const char* in, char* out, int inSize, int outSize) +{ + (void)inSize; + LZ4_decompress_fast_usingDict(in, out, outSize, out - 65536, 65535); + return outSize; +} + +static int local_LZ4_decompress_safe_withPrefix64k(const char* in, char* out, int inSize, int outSize) +{ + LZ4_decompress_safe_withPrefix64k(in, out, inSize, outSize); + return outSize; +} + +static int local_LZ4_decompress_safe_usingDict(const char* in, char* out, int inSize, int outSize) +{ + LZ4_decompress_safe_usingDict(in, out, inSize, outSize, out - 65536, 65536); + return outSize; +} + +#ifndef LZ4_DLL_IMPORT +#if defined (__cplusplus) +extern "C" { +#endif + +extern int LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize, const void* dict, size_t dictSize); + +#if defined (__cplusplus) +} +#endif + +static int local_LZ4_decompress_safe_forceExtDict(const char* in, char* out, int inSize, int outSize) +{ + (void)inSize; + LZ4_decompress_safe_forceExtDict(in, out, inSize, outSize, out - 65536, 65536); + return outSize; +} +#endif + +static int local_LZ4_decompress_safe_partial(const char* in, char* out, int inSize, int outSize) +{ + int result = LZ4_decompress_safe_partial(in, out, inSize, outSize - 5, outSize); + if (result < 0) return result; + return outSize; +} + + +/* frame functions */ +static int local_LZ4F_compressFrame(const char* in, char* out, int inSize) +{ + assert(inSize >= 0); + return (int)LZ4F_compressFrame(out, LZ4F_compressFrameBound((size_t)inSize, NULL), in, (size_t)inSize, NULL); +} + +static LZ4F_decompressionContext_t g_dCtx; + +static int local_LZ4F_decompress(const char* in, char* out, int inSize, int outSize) +{ + size_t srcSize = (size_t)inSize; + size_t dstSize = (size_t)outSize; + size_t result; + assert(inSize >= 0); + assert(outSize >= 0); + result = LZ4F_decompress(g_dCtx, out, &dstSize, in, &srcSize, NULL); + if (result!=0) { DISPLAY("Error decompressing frame : unfinished frame \n"); exit(8); } + if (srcSize != (size_t)inSize) { DISPLAY("Error decompressing frame : read size incorrect \n"); exit(9); } + return (int)dstSize; +} + +static int local_LZ4F_decompress_followHint(const char* src, char* dst, int srcSize, int dstSize) +{ + size_t totalInSize = (size_t)srcSize; + size_t maxOutSize = (size_t)dstSize; + + size_t inPos = 0; + size_t inSize = 0; + size_t outPos = 0; + size_t outRemaining = maxOutSize - outPos; + + for (;;) { + size_t const sizeHint = LZ4F_decompress(g_dCtx, dst+outPos, &outRemaining, src+inPos, &inSize, NULL); + assert(!LZ4F_isError(sizeHint)); + + inPos += inSize; + inSize = sizeHint; + + outPos += outRemaining; + outRemaining = maxOutSize - outPos; + + if (!sizeHint) break; + } + + /* frame completed */ + if (inPos != totalInSize) { + DISPLAY("Error decompressing frame : must read (%u) full frame (%u) \n", + (unsigned)inPos, (unsigned)totalInSize); + exit(10); + } + return (int)outPos; + +} + +/* always provide input by block of 64 KB */ +static int local_LZ4F_decompress_noHint(const char* src, char* dst, int srcSize, int dstSize) +{ + size_t totalInSize = (size_t)srcSize; + size_t maxOutSize = (size_t)dstSize; + + size_t inPos = 0; + size_t inSize = 64 KB; + size_t outPos = 0; + size_t outRemaining = maxOutSize - outPos; + + for (;;) { + size_t const sizeHint = LZ4F_decompress(g_dCtx, dst+outPos, &outRemaining, src+inPos, &inSize, NULL); + assert(!LZ4F_isError(sizeHint)); + + inPos += inSize; + inSize = (inPos + 64 KB <= totalInSize) ? 64 KB : totalInSize - inPos; + + outPos += outRemaining; + outRemaining = maxOutSize - outPos; + + if (!sizeHint) break; + } + + /* frame completed */ + if (inPos != totalInSize) { + DISPLAY("Error decompressing frame : must read (%u) full frame (%u) \n", + (unsigned)inPos, (unsigned)totalInSize); + exit(10); + } + return (int)outPos; + +} + +#define NB_COMPRESSION_ALGORITHMS 100 +#define NB_DECOMPRESSION_ALGORITHMS 100 +int fullSpeedBench(const char** fileNamesTable, int nbFiles) +{ + int fileIdx=0; + + /* Init */ + { size_t const errorCode = LZ4F_createDecompressionContext(&g_dCtx, LZ4F_VERSION); + if (LZ4F_isError(errorCode)) { DISPLAY("dctx allocation issue \n"); return 10; } } + + /* Loop for each fileName */ + while (fileIdx inFileSize) benchedSize = (size_t)inFileSize; + if (benchedSize < inFileSize) { + DISPLAY("Not enough memory for '%s' full size; testing %i MB only... \n", + inFileName, (int)(benchedSize>>20)); + } + + /* Allocation */ + chunkP = (struct chunkParameters*) malloc(((benchedSize / (size_t)g_chunkSize)+1) * sizeof(struct chunkParameters)); + orig_buff = (char*) malloc(benchedSize); + nbChunks = (int) ((benchedSize + (size_t)g_chunkSize - 1) / (size_t)g_chunkSize); + maxCompressedChunkSize = LZ4_compressBound(g_chunkSize); + compressedBuffSize = nbChunks * maxCompressedChunkSize; + compressed_buff = (char*)malloc((size_t)compressedBuffSize); + if(!chunkP || !orig_buff || !compressed_buff) { + DISPLAY("\nError: not enough memory! \n"); + fclose(inFile); + free(orig_buff); + free(compressed_buff); + free(chunkP); + return(12); + } + + /* Fill in src buffer */ + DISPLAY("Loading %s... \r", inFileName); + readSize = fread(orig_buff, 1, benchedSize, inFile); + fclose(inFile); + + if (readSize != benchedSize) { + DISPLAY("\nError: problem reading file '%s' !! \n", inFileName); + free(orig_buff); + free(compressed_buff); + free(chunkP); + return 13; + } + + /* Calculating input Checksum */ + crcOriginal = XXH32(orig_buff, benchedSize,0); + + + /* Bench */ + { int loopNb, nb_loops, chunkNb, cAlgNb, dAlgNb; + size_t cSize=0; + double ratio=0.; + + DISPLAY("\r%79s\r", ""); + DISPLAY(" %s : \n", inFileName); + + /* Bench Compression Algorithms */ + for (cAlgNb=0; (cAlgNb <= NB_COMPRESSION_ALGORITHMS) && (g_compressionTest); cAlgNb++) { + const char* compressorName; + int (*compressionFunction)(const char*, char*, int); + void (*initFunction)(void) = NULL; + double bestTime = 100000000.; + + /* filter compressionAlgo only */ + if ((g_compressionAlgo != ALL_COMPRESSORS) && (g_compressionAlgo != cAlgNb)) continue; + + /* Init data chunks */ + { int i; + size_t remaining = benchedSize; + char* in = orig_buff; + char* out = compressed_buff; + assert(nbChunks >= 1); + for (i=0; i 0); + if (remaining > (size_t)g_chunkSize) { + chunkP[i].origSize = g_chunkSize; + remaining -= (size_t)g_chunkSize; + } else { + chunkP[i].origSize = (int)remaining; + remaining = 0; + } + chunkP[i].compressedBuffer = out; out += maxCompressedChunkSize; + chunkP[i].compressedSize = 0; + } + } + + switch(cAlgNb) + { + case 0 : DISPLAY("Compression functions : \n"); continue; + case 1 : compressionFunction = local_LZ4_compress_default_large; compressorName = "LZ4_compress_default"; break; + case 2 : compressionFunction = local_LZ4_compress_default_small; compressorName = "LZ4_compress_default(small dst)"; break; + case 3 : compressionFunction = local_LZ4_compress_destSize; compressorName = "LZ4_compress_destSize"; break; + case 4 : compressionFunction = local_LZ4_compress_fast0; compressorName = "LZ4_compress_fast(0)"; break; + case 5 : compressionFunction = local_LZ4_compress_fast1; compressorName = "LZ4_compress_fast(1)"; break; + case 6 : compressionFunction = local_LZ4_compress_fast2; compressorName = "LZ4_compress_fast(2)"; break; + case 7 : compressionFunction = local_LZ4_compress_fast17; compressorName = "LZ4_compress_fast(17)"; break; + case 8 : compressionFunction = local_LZ4_compress_fast_extState0; compressorName = "LZ4_compress_fast_extState(0)"; break; + case 9 : compressionFunction = local_LZ4_compress_fast_continue0; initFunction = local_LZ4_createStream; compressorName = "LZ4_compress_fast_continue(0)"; break; + + case 10: compressionFunction = local_LZ4_compress_HC; compressorName = "LZ4_compress_HC"; break; + case 12: compressionFunction = local_LZ4_compress_HC_extStateHC; compressorName = "LZ4_compress_HC_extStateHC"; break; + case 14: compressionFunction = local_LZ4_compress_HC_continue; initFunction = local_LZ4_resetStreamHC; compressorName = "LZ4_compress_HC_continue"; break; +#ifndef LZ4_DLL_IMPORT + case 20: compressionFunction = local_LZ4_compress_forceDict; initFunction = local_LZ4_resetDictT; compressorName = "LZ4_compress_forceDict"; break; +#endif + case 30: compressionFunction = local_LZ4F_compressFrame; compressorName = "LZ4F_compressFrame"; + chunkP[0].origSize = (int)benchedSize; nbChunks=1; + break; + case 40: compressionFunction = local_LZ4_saveDict; compressorName = "LZ4_saveDict"; + if (chunkP[0].origSize < 8) { DISPLAY(" cannot bench %s with less then 8 bytes \n", compressorName); continue; } + LZ4_loadDict(&LZ4_stream, chunkP[0].origBuffer, chunkP[0].origSize); + break; + case 41: compressionFunction = local_LZ4_saveDictHC; compressorName = "LZ4_saveDictHC"; + if (chunkP[0].origSize < 8) { DISPLAY(" cannot bench %s with less then 8 bytes \n", compressorName); continue; } + LZ4_loadDictHC(&LZ4_streamHC, chunkP[0].origBuffer, chunkP[0].origSize); + break; + default : + continue; /* unknown ID : just skip */ + } + + for (loopNb = 1; loopNb <= g_nbIterations; loopNb++) { + double averageTime; + clock_t clockTime; + + PROGRESS("%2i-%-34.34s :%10i ->\r", loopNb, compressorName, (int)benchedSize); + { size_t i; for (i=0; i %9i (%5.2f%%),%7.1f MB/s\r", loopNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000); + } + + if (ratio<100.) + DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.2f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 1000000); + else + DISPLAY("%2i-%-34.34s :%10i ->%9i (%5.1f%%),%7.1f MB/s\n", cAlgNb, compressorName, (int)benchedSize, (int)cSize, ratio, (double)benchedSize / bestTime / 100000); + } + + /* Prepare layout for decompression */ + /* Init data chunks */ + { int i; + size_t remaining = benchedSize; + char* in = orig_buff; + char* out = compressed_buff; + + nbChunks = (int) (((int)benchedSize + (g_chunkSize-1))/ g_chunkSize); + for (i=0; i g_chunkSize) { + chunkP[i].origSize = g_chunkSize; + remaining -= (size_t)g_chunkSize; + } else { + chunkP[i].origSize = (int)remaining; + remaining = 0; + } + chunkP[i].compressedBuffer = out; out += maxCompressedChunkSize; + chunkP[i].compressedSize = 0; + } + } + for (chunkNb=0; chunkNb \r", loopNb, dName, (int)benchedSize); + + nb_loops = 0; + clockTime = clock(); + while(clock() == clockTime); + clockTime = clock(); + while(BMK_GetClockSpan(clockTime) < TIMELOOP) { + for (chunkNb=0; chunkNb %7.1f MB/s\r", loopNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000); + + /* CRC Checking */ + crcDecoded = XXH32(orig_buff, benchedSize, 0); + if (checkResult && (crcOriginal!=crcDecoded)) { + DISPLAY("\n!!! WARNING !!! %14s : Invalid Checksum : %x != %x\n", + inFileName, (unsigned)crcOriginal, (unsigned)crcDecoded); + exit(1); + } } + + DISPLAY("%2i-%-34.34s :%10i -> %7.1f MB/s\n", dAlgNb, dName, (int)benchedSize, (double)benchedSize / bestTime / 1000000); + } + } + free(orig_buff); + free(compressed_buff); + free(chunkP); + } + + LZ4F_freeDecompressionContext(g_dCtx); + if (g_pause) { printf("press enter...\n"); (void)getchar(); } + + return 0; +} + + +static int usage(const char* exename) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [arg] file1 file2 ... fileX\n", exename); + DISPLAY( "Arguments :\n"); + DISPLAY( " -c : compression tests only\n"); + DISPLAY( " -d : decompression tests only\n"); + DISPLAY( " -H/-h : Help (this text + advanced options)\n"); + return 0; +} + +static int usage_advanced(void) +{ + DISPLAY( "\nAdvanced options :\n"); + DISPLAY( " -c# : test only compression function # [1-%i]\n", NB_COMPRESSION_ALGORITHMS); + DISPLAY( " -d# : test only decompression function # [1-%i]\n", NB_DECOMPRESSION_ALGORITHMS); + DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS); + DISPLAY( " -B# : Block size [4-7](default : 7)\n"); + return 0; +} + +static int badusage(const char* exename) +{ + DISPLAY("Wrong parameters\n"); + usage(exename); + return 0; +} + +int main(int argc, const char** argv) +{ + int i, + filenamesStart=2; + const char* exename = argv[0]; + const char* input_filename=0; + + // Welcome message + DISPLAY(WELCOME_MESSAGE); + + if (argc<2) { badusage(exename); return 1; } + + for(i=1; i = '0') && (argument[1]<= '9')) { + g_compressionAlgo *= 10; + g_compressionAlgo += argument[1] - '0'; + argument++; + } + break; + + // Select decompression algorithm only + case 'd': + g_compressionTest = 0; + while ((argument[1]>= '0') && (argument[1]<= '9')) { + g_decompressionAlgo *= 10; + g_decompressionAlgo += argument[1] - '0'; + argument++; + } + break; + + // Display help on usage + case 'h' : + case 'H': usage(exename); usage_advanced(); return 0; + + // Modify Block Properties + case 'B': + while (argument[1]!=0) + switch(argument[1]) + { + case '4': + case '5': + case '6': + case '7': + { int B = argument[1] - '0'; + int S = 1 << (8 + 2*B); + BMK_setBlocksize(S); + argument++; + break; + } + case 'D': argument++; break; + default : goto _exit_blockProperties; + } +_exit_blockProperties: + break; + + // Modify Nb Iterations + case 'i': + if ((argument[1] >='0') && (argument[1] <='9')) { + int iters = argument[1] - '0'; + BMK_setNbIterations(iters); + argument++; + } + break; + + // Pause at the end (hidden option) + case 'p': BMK_setPause(); break; + + // Unknown command + default : badusage(exename); return 1; + } + } + continue; + } + + // first provided filename is input + if (!input_filename) { input_filename=argument; filenamesStart=i; continue; } + + } + + // No input filename ==> Error + if(!input_filename) { badusage(exename); return 1; } + + return fullSpeedBench(argv+filenamesStart, argc-filenamesStart); + +} diff --git a/tests/fuzzer.c b/tests/fuzzer.c new file mode 100644 index 0000000..a824813 --- /dev/null +++ b/tests/fuzzer.c @@ -0,0 +1,1841 @@ +/* + fuzzer.c - Fuzzer test tool for LZ4 + Copyright (C) Yann Collet 2012-2017 + + GPL v2 License + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + You can contact the author at : + - LZ4 homepage : http://www.lz4.org + - LZ4 source repo : https://github.com/lz4/lz4 +*/ + +/*-************************************ +* Compiler options +**************************************/ +#ifdef _MSC_VER /* Visual Studio */ +# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ +# pragma warning(disable : 4146) /* disable: C4146: minus unsigned expression */ +# pragma warning(disable : 4310) /* disable: C4310: constant char value > 127 */ +#endif + + +/*-************************************ +* Dependencies +**************************************/ +#if defined(__unix__) && !defined(_AIX) /* must be included before platform.h for MAP_ANONYMOUS */ +# undef _GNU_SOURCE /* in case it's already defined */ +# define _GNU_SOURCE /* MAP_ANONYMOUS even in -std=c99 mode */ +# include /* mmap */ +#endif +#include "platform.h" /* _CRT_SECURE_NO_WARNINGS */ +#include "util.h" /* U32 */ +#include +#include /* fgets, sscanf */ +#include /* strcmp */ +#include /* clock_t, clock, CLOCKS_PER_SEC */ +#include +#include /* INT_MAX */ + +#if defined(_AIX) +# include /* mmap */ +#endif + +#define LZ4_DISABLE_DEPRECATE_WARNINGS /* LZ4_decompress_fast */ +#define LZ4_STATIC_LINKING_ONLY +#include "lz4.h" +#define LZ4_HC_STATIC_LINKING_ONLY +#include "lz4hc.h" +#define XXH_STATIC_LINKING_ONLY +#include "xxhash.h" + + +/*-************************************ +* Basic Types +**************************************/ +#if !defined(__cplusplus) && !(defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) +typedef size_t uintptr_t; /* true on most systems, except OpenVMS-64 (which doesn't need address overflow test) */ +#endif + + +/*-************************************ +* Constants +**************************************/ +#define NB_ATTEMPTS (1<<16) +#define COMPRESSIBLE_NOISE_LENGTH (1 << 21) +#define FUZ_MAX_BLOCK_SIZE (1 << 17) +#define FUZ_MAX_DICT_SIZE (1 << 15) +#define FUZ_COMPRESSIBILITY_DEFAULT 60 +#define PRIME1 2654435761U +#define PRIME2 2246822519U +#define PRIME3 3266489917U + +#define KB *(1U<<10) +#define MB *(1U<<20) +#define GB *(1U<<30) + + +/*-*************************************** +* Macros +*****************************************/ +#define DISPLAY(...) fprintf(stdout, __VA_ARGS__) +#define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } +static int g_displayLevel = 2; + +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + + +/*-******************************************************* +* Fuzzer functions +*********************************************************/ +static clock_t FUZ_GetClockSpan(clock_t clockStart) +{ + return clock() - clockStart; /* works even if overflow; max span ~ 30mn */ +} + +static void FUZ_displayUpdate(unsigned testNb) +{ + static clock_t g_time = 0; + static const clock_t g_refreshRate = CLOCKS_PER_SEC / 5; + if ((FUZ_GetClockSpan(g_time) > g_refreshRate) || (g_displayLevel>=4)) { + g_time = clock(); + DISPLAY("\r%5u ", testNb); + fflush(stdout); + } +} + +static U32 FUZ_rotl32(U32 u32, U32 nbBits) +{ + return ((u32 << nbBits) | (u32 >> (32 - nbBits))); +} + +static U32 FUZ_highbit32(U32 v32) +{ + unsigned nbBits = 0; + if (v32==0) return 0; + while (v32) { v32 >>= 1; nbBits++; } + return nbBits; +} + +static U32 FUZ_rand(U32* src) +{ + U32 rand32 = *src; + rand32 *= PRIME1; + rand32 ^= PRIME2; + rand32 = FUZ_rotl32(rand32, 13); + *src = rand32; + return rand32; +} + + +#define FUZ_RAND15BITS ((FUZ_rand(seed) >> 3) & 32767) +#define FUZ_RANDLENGTH ( ((FUZ_rand(seed) >> 7) & 3) ? (FUZ_rand(seed) % 15) : (FUZ_rand(seed) % 510) + 15) +static void FUZ_fillCompressibleNoiseBuffer(void* buffer, size_t bufferSize, double proba, U32* seed) +{ + BYTE* const BBuffer = (BYTE*)buffer; + size_t pos = 0; + U32 const P32 = (U32)(32768 * proba); + + /* First Bytes */ + while (pos < 20) + BBuffer[pos++] = (BYTE)(FUZ_rand(seed)); + + while (pos < bufferSize) { + /* Select : Literal (noise) or copy (within 64K) */ + if (FUZ_RAND15BITS < P32) { + /* Copy (within 64K) */ + size_t const length = (size_t)FUZ_RANDLENGTH + 4; + size_t const d = MIN(pos+length, bufferSize); + size_t match; + size_t offset = (size_t)FUZ_RAND15BITS + 1; + while (offset > pos) offset >>= 1; + match = pos - offset; + while (pos < d) BBuffer[pos++] = BBuffer[match++]; + } else { + /* Literal (noise) */ + size_t const length = FUZ_RANDLENGTH; + size_t const d = MIN(pos+length, bufferSize); + while (pos < d) BBuffer[pos++] = (BYTE)(FUZ_rand(seed) >> 5); + } + } +} + + +#define MAX_NB_BUFF_I134 150 +#define BLOCKSIZE_I134 (32 MB) +/*! FUZ_AddressOverflow() : +* Aggressively pushes memory allocation limits, +* and generates patterns which create address space overflow. +* only possible in 32-bits mode */ +static int FUZ_AddressOverflow(void) +{ + char* buffers[MAX_NB_BUFF_I134+1]; + int nbBuff=0; + int highAddress = 0; + + DISPLAY("Overflow tests : "); + + /* Only possible in 32-bits */ + if (sizeof(void*)==8) { + DISPLAY("64 bits mode : no overflow \n"); + fflush(stdout); + return 0; + } + + buffers[0] = (char*)malloc(BLOCKSIZE_I134); + buffers[1] = (char*)malloc(BLOCKSIZE_I134); + if ((!buffers[0]) || (!buffers[1])) { + free(buffers[0]); free(buffers[1]); + DISPLAY("not enough memory for tests \n"); + return 0; + } + + for (nbBuff=2; nbBuff < MAX_NB_BUFF_I134; nbBuff++) { + DISPLAY("%3i \b\b\b\b", nbBuff); fflush(stdout); + buffers[nbBuff] = (char*)malloc(BLOCKSIZE_I134); + if (buffers[nbBuff]==NULL) goto _endOfTests; + + if (((uintptr_t)buffers[nbBuff] > (uintptr_t)0x80000000) && (!highAddress)) { + DISPLAY("high address detected : "); + fflush(stdout); + highAddress=1; + } + + { size_t const sizeToGenerateOverflow = (size_t)(- ((uintptr_t)buffers[nbBuff-1]) + 512); + int const nbOf255 = (int)((sizeToGenerateOverflow / 255) + 1); + char* const input = buffers[nbBuff-1]; + char* output = buffers[nbBuff]; + int r; + input[0] = (char)0xF0; /* Literal length overflow */ + input[1] = (char)0xFF; + input[2] = (char)0xFF; + input[3] = (char)0xFF; + { int u; for(u = 4; u <= nbOf255+4; u++) input[u] = (char)0xff; } + r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); + if (r>0) { DISPLAY("LZ4_decompress_safe = %i \n", r); goto _overflowError; } + input[0] = (char)0x1F; /* Match length overflow */ + input[1] = (char)0x01; + input[2] = (char)0x01; + input[3] = (char)0x00; + r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); + if (r>0) { DISPLAY("LZ4_decompress_safe = %i \n", r); goto _overflowError; } + + output = buffers[nbBuff-2]; /* Reverse in/out pointer order */ + input[0] = (char)0xF0; /* Literal length overflow */ + input[1] = (char)0xFF; + input[2] = (char)0xFF; + input[3] = (char)0xFF; + r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); + if (r>0) goto _overflowError; + input[0] = (char)0x1F; /* Match length overflow */ + input[1] = (char)0x01; + input[2] = (char)0x01; + input[3] = (char)0x00; + r = LZ4_decompress_safe(input, output, nbOf255+64, BLOCKSIZE_I134); + if (r>0) goto _overflowError; + } + } + + nbBuff++; +_endOfTests: + { int i; for (i=0 ; i =4) { \ + printf("\r%4u - %2u :", cycleNb, testNb); \ + printf(" " __VA_ARGS__); \ + printf(" "); \ + fflush(stdout); \ + } } + + + /* init */ + if(!CNBuffer || !compressedBuffer || !decodedBuffer || !LZ4dictHC) { + DISPLAY("Not enough memory to start fuzzer tests"); + exit(1); + } + if ( LZ4_initStream(&LZ4dictBody, sizeof(LZ4dictBody)) == NULL) abort(); + { U32 randState = coreRandState ^ PRIME3; + FUZ_fillCompressibleNoiseBuffer(CNBuffer, COMPRESSIBLE_NOISE_LENGTH, compressibility, &randState); + } + + /* move to startCycle */ + for (cycleNb = 0; cycleNb < startCycle; cycleNb++) + (void) FUZ_rand(&coreRandState); /* sync coreRandState */ + + /* Main test loop */ + for (cycleNb = startCycle; + (cycleNb < nbCycles) || (FUZ_GetClockSpan(clockStart) < clockDuration); + cycleNb++) { + U32 testNb = 0; + U32 randState = FUZ_rand(&coreRandState) ^ PRIME3; + int const blockSize = (FUZ_rand(&randState) % (FUZ_MAX_BLOCK_SIZE-1)) + 1; + int const blockStart = (int)(FUZ_rand(&randState) % (U32)(COMPRESSIBLE_NOISE_LENGTH - blockSize - 1)) + 1; + int const dictSizeRand = FUZ_rand(&randState) % FUZ_MAX_DICT_SIZE; + int const dictSize = MIN(dictSizeRand, blockStart - 1); + int const compressionLevel = FUZ_rand(&randState) % (LZ4HC_CLEVEL_MAX+1); + const char* block = ((char*)CNBuffer) + blockStart; + const char* dict = block - dictSize; + int compressedSize, HCcompressedSize; + int blockContinueCompressedSize; + U32 const crcOrig = XXH32(block, (size_t)blockSize, 0); + int ret; + + FUZ_displayUpdate(cycleNb); + + /* Compression tests */ + if ( ((FUZ_rand(&randState) & 63) == 2) + && ((size_t)blockSize < labSize) ) { + memcpy(lowAddrBuffer, block, blockSize); + block = (const char*)lowAddrBuffer; + } + + /* Test compression destSize */ + FUZ_DISPLAYTEST("test LZ4_compress_destSize()"); + { int cSize, srcSize = blockSize; + int const targetSize = srcSize * (int)((FUZ_rand(&randState) & 127)+1) >> 7; + char const endCheck = (char)(FUZ_rand(&randState) & 255); + compressedBuffer[targetSize] = endCheck; + cSize = LZ4_compress_destSize(block, compressedBuffer, &srcSize, targetSize); + FUZ_CHECKTEST(cSize > targetSize, "LZ4_compress_destSize() result larger than dst buffer !"); + FUZ_CHECKTEST(compressedBuffer[targetSize] != endCheck, "LZ4_compress_destSize() overwrite dst buffer !"); + FUZ_CHECKTEST(srcSize > blockSize, "LZ4_compress_destSize() read more than src buffer !"); + DISPLAYLEVEL(5, "destSize : %7i/%7i; content%7i/%7i ", cSize, targetSize, srcSize, blockSize); + if (targetSize>0) { + /* check correctness */ + U32 const crcBase = XXH32(block, (size_t)srcSize, 0); + char const canary = (char)(FUZ_rand(&randState) & 255); + FUZ_CHECKTEST((cSize==0), "LZ4_compress_destSize() compression failed"); + FUZ_DISPLAYTEST(); + decodedBuffer[srcSize] = canary; + { int const dSize = LZ4_decompress_safe(compressedBuffer, decodedBuffer, cSize, srcSize); + FUZ_CHECKTEST(dSize<0, "LZ4_decompress_safe() failed on data compressed by LZ4_compress_destSize"); + FUZ_CHECKTEST(dSize!=srcSize, "LZ4_decompress_safe() failed : did not fully decompressed data"); + } + FUZ_CHECKTEST(decodedBuffer[srcSize] != canary, "LZ4_decompress_safe() overwrite dst buffer !"); + { U32 const crcDec = XXH32(decodedBuffer, (size_t)srcSize, 0); + FUZ_CHECKTEST(crcDec!=crcBase, "LZ4_decompress_safe() corrupted decoded data"); + } } + DISPLAYLEVEL(5, " OK \n"); + } + + /* Test compression HC destSize */ + FUZ_DISPLAYTEST("test LZ4_compress_HC_destSize()"); + { int cSize, srcSize = blockSize; + int const targetSize = srcSize * (int)((FUZ_rand(&randState) & 127)+1) >> 7; + char const endCheck = (char)(FUZ_rand(&randState) & 255); + void* const ctx = LZ4_createHC(block); + FUZ_CHECKTEST(ctx==NULL, "LZ4_createHC() allocation failed"); + compressedBuffer[targetSize] = endCheck; + cSize = LZ4_compress_HC_destSize(ctx, block, compressedBuffer, &srcSize, targetSize, compressionLevel); + DISPLAYLEVEL(5, "LZ4_compress_HC_destSize(%i): destSize : %7i/%7i; content%7i/%7i ", + compressionLevel, cSize, targetSize, srcSize, blockSize); + LZ4_freeHC(ctx); + FUZ_CHECKTEST(cSize > targetSize, "LZ4_compress_HC_destSize() result larger than dst buffer !"); + FUZ_CHECKTEST(compressedBuffer[targetSize] != endCheck, "LZ4_compress_HC_destSize() overwrite dst buffer !"); + FUZ_CHECKTEST(srcSize > blockSize, "LZ4_compress_HC_destSize() fed more than src buffer !"); + if (targetSize>0) { + /* check correctness */ + U32 const crcBase = XXH32(block, (size_t)srcSize, 0); + char const canary = (char)(FUZ_rand(&randState) & 255); + FUZ_CHECKTEST((cSize==0), "LZ4_compress_HC_destSize() compression failed"); + FUZ_DISPLAYTEST(); + decodedBuffer[srcSize] = canary; + { int const dSize = LZ4_decompress_safe(compressedBuffer, decodedBuffer, cSize, srcSize); + FUZ_CHECKTEST(dSize<0, "LZ4_decompress_safe failed (%i) on data compressed by LZ4_compressHC_destSize", dSize); + FUZ_CHECKTEST(dSize!=srcSize, "LZ4_decompress_safe failed : decompressed %i bytes, was supposed to decompress %i bytes", dSize, srcSize); + } + FUZ_CHECKTEST(decodedBuffer[srcSize] != canary, "LZ4_decompress_safe overwrite dst buffer !"); + { U32 const crcDec = XXH32(decodedBuffer, (size_t)srcSize, 0); + FUZ_CHECKTEST(crcDec!=crcBase, "LZ4_decompress_safe() corrupted decoded data"); + } } + DISPLAYLEVEL(5, " OK \n"); + } + + /* Test compression HC */ + FUZ_DISPLAYTEST("test LZ4_compress_HC()"); + HCcompressedSize = LZ4_compress_HC(block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel); + FUZ_CHECKTEST(HCcompressedSize==0, "LZ4_compress_HC() failed"); + + /* Test compression HC using external state */ + FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC()"); + { int const r = LZ4_compress_HC_extStateHC(stateLZ4HC, block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel); + FUZ_CHECKTEST(r==0, "LZ4_compress_HC_extStateHC() failed") + } + + /* Test compression HC using fast reset external state */ + FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC_fastReset()"); + { int const r = LZ4_compress_HC_extStateHC_fastReset(stateLZ4HC, block, compressedBuffer, blockSize, (int)compressedBufferSize, compressionLevel); + FUZ_CHECKTEST(r==0, "LZ4_compress_HC_extStateHC_fastReset() failed"); + } + + /* Test compression using external state */ + FUZ_DISPLAYTEST("test LZ4_compress_fast_extState()"); + { int const r = LZ4_compress_fast_extState(stateLZ4, block, compressedBuffer, blockSize, (int)compressedBufferSize, 8); + FUZ_CHECKTEST(r==0, "LZ4_compress_fast_extState() failed"); } + + /* Test compression using fast reset external state*/ + FUZ_DISPLAYTEST(); + { int const r = LZ4_compress_fast_extState_fastReset(stateLZ4, block, compressedBuffer, blockSize, (int)compressedBufferSize, 8); + FUZ_CHECKTEST(r==0, "LZ4_compress_fast_extState_fastReset() failed"); } + + /* Test compression */ + FUZ_DISPLAYTEST("test LZ4_compress_default()"); + compressedSize = LZ4_compress_default(block, compressedBuffer, blockSize, (int)compressedBufferSize); + FUZ_CHECKTEST(compressedSize<=0, "LZ4_compress_default() failed"); + + /* Decompression tests */ + + /* Test decompress_fast() with input buffer size exactly correct => must not read out of bound */ + { char* const cBuffer_exact = (char*)malloc((size_t)compressedSize); + assert(cBuffer_exact != NULL); + assert(compressedSize <= (int)compressedBufferSize); + memcpy(cBuffer_exact, compressedBuffer, compressedSize); + + /* Test decoding with output size exactly correct => must work */ + FUZ_DISPLAYTEST("LZ4_decompress_fast() with exact output buffer"); + { int const r = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize); + FUZ_CHECKTEST(r<0, "LZ4_decompress_fast failed despite correct space"); + FUZ_CHECKTEST(r!=compressedSize, "LZ4_decompress_fast failed : did not fully read compressed data"); + } + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_fast corrupted decoded data"); + } + + /* Test decoding with one byte missing => must fail */ + FUZ_DISPLAYTEST("LZ4_decompress_fast() with output buffer 1-byte too short"); + decodedBuffer[blockSize-1] = 0; + { int const r = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize-1); + FUZ_CHECKTEST(r>=0, "LZ4_decompress_fast should have failed, due to Output Size being too small"); + } + FUZ_CHECKTEST(decodedBuffer[blockSize-1]!=0, "LZ4_decompress_fast overrun specified output buffer"); + + /* Test decoding with one byte too much => must fail */ + FUZ_DISPLAYTEST(); + { int const r = LZ4_decompress_fast(cBuffer_exact, decodedBuffer, blockSize+1); + FUZ_CHECKTEST(r>=0, "LZ4_decompress_fast should have failed, due to Output Size being too large"); + } + + /* Test decoding with output size exactly what's necessary => must work */ + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize] = 0; + { int const r = LZ4_decompress_safe(cBuffer_exact, decodedBuffer, compressedSize, blockSize); + FUZ_CHECKTEST(r<0, "LZ4_decompress_safe failed despite sufficient space"); + FUZ_CHECKTEST(r!=blockSize, "LZ4_decompress_safe did not regenerate original data"); + } + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size"); + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data"); + } + + /* Test decoding with more than enough output size => must work */ + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize] = 0; + decodedBuffer[blockSize+1] = 0; + { int const r = LZ4_decompress_safe(cBuffer_exact, decodedBuffer, compressedSize, blockSize+1); + FUZ_CHECKTEST(r<0, "LZ4_decompress_safe failed despite amply sufficient space"); + FUZ_CHECKTEST(r!=blockSize, "LZ4_decompress_safe did not regenerate original data"); + } + FUZ_CHECKTEST(decodedBuffer[blockSize+1], "LZ4_decompress_safe overrun specified output buffer size"); + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe corrupted decoded data"); + } + + /* Test decoding with output size being one byte too short => must fail */ + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize-1] = 0; + { int const r = LZ4_decompress_safe(cBuffer_exact, decodedBuffer, compressedSize, blockSize-1); + FUZ_CHECKTEST(r>=0, "LZ4_decompress_safe should have failed, due to Output Size being one byte too short"); + } + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe overrun specified output buffer size"); + + /* Test decoding with output size being 10 bytes too short => must fail */ + FUZ_DISPLAYTEST(); + if (blockSize>10) { + decodedBuffer[blockSize-10] = 0; + { int const r = LZ4_decompress_safe(cBuffer_exact, decodedBuffer, compressedSize, blockSize-10); + FUZ_CHECKTEST(r>=0, "LZ4_decompress_safe should have failed, due to Output Size being 10 bytes too short"); + } + FUZ_CHECKTEST(decodedBuffer[blockSize-10], "LZ4_decompress_safe overrun specified output buffer size"); + } + + /* noisy src decompression test */ + + /* insert noise into src */ + { U32 const maxNbBits = FUZ_highbit32((U32)compressedSize); + size_t pos = 0; + for (;;) { + /* keep some original src */ + { U32 const nbBits = FUZ_rand(&randState) % maxNbBits; + size_t const mask = (1< = (size_t)compressedSize) break; + /* add noise */ + { U32 const nbBitsCodes = FUZ_rand(&randState) % maxNbBits; + U32 const nbBits = nbBitsCodes ? nbBitsCodes-1 : 0; + size_t const mask = (1< blockSize, "LZ4_decompress_safe on noisy src : result is too large : %u > %u (dst buffer)", (unsigned)decompressResult, (unsigned)blockSize); + } + { U32 endCheck; memcpy(&endCheck, decodedBuffer+blockSize, sizeof(endCheck)); + FUZ_CHECKTEST(endMark!=endCheck, "LZ4_decompress_safe on noisy src : dst buffer overflow"); + } } /* noisy src decompression test */ + + free(cBuffer_exact); + } + + /* Test decoding with input size being one byte too short => must fail */ + FUZ_DISPLAYTEST(); + { int const r = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize-1, blockSize); + FUZ_CHECKTEST(r>=0, "LZ4_decompress_safe should have failed, due to input size being one byte too short (blockSize=%i, result=%i, compressedSize=%i)", blockSize, r, compressedSize); + } + + /* Test decoding with input size being one byte too large => must fail */ + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize] = 0; + { int const r = LZ4_decompress_safe(compressedBuffer, decodedBuffer, compressedSize+1, blockSize); + FUZ_CHECKTEST(r>=0, "LZ4_decompress_safe should have failed, due to input size being too large"); + } + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe overrun specified output buffer size"); + + /* Test partial decoding => must work */ + FUZ_DISPLAYTEST("test LZ4_decompress_safe_partial"); + { size_t const missingOutBytes = FUZ_rand(&randState) % (unsigned)blockSize; + int const targetSize = (int)((size_t)blockSize - missingOutBytes); + size_t const extraneousInBytes = FUZ_rand(&randState) % 2; + int const inCSize = (int)((size_t)compressedSize + extraneousInBytes); + char const sentinel = decodedBuffer[targetSize] = block[targetSize] ^ 0x5A; + int const decResult = LZ4_decompress_safe_partial(compressedBuffer, decodedBuffer, inCSize, targetSize, blockSize); + FUZ_CHECKTEST(decResult<0, "LZ4_decompress_safe_partial failed despite valid input data (error:%i)", decResult); + FUZ_CHECKTEST(decResult != targetSize, "LZ4_decompress_safe_partial did not regenerated required amount of data (%i < %i <= %i)", decResult, targetSize, blockSize); + FUZ_CHECKTEST(decodedBuffer[targetSize] != sentinel, "LZ4_decompress_safe_partial overwrite beyond requested size (though %i <= %i <= %i)", decResult, targetSize, blockSize); + FUZ_CHECKTEST(memcmp(block, decodedBuffer, (size_t)targetSize), "LZ4_decompress_safe_partial: corruption detected in regenerated data"); + } + + /* Test Compression with limited output size */ + + /* Test compression with output size being exactly what's necessary (should work) */ + FUZ_DISPLAYTEST("test LZ4_compress_default() with output buffer just the right size"); + ret = LZ4_compress_default(block, compressedBuffer, blockSize, compressedSize); + FUZ_CHECKTEST(ret==0, "LZ4_compress_default() failed despite sufficient space"); + + /* Test compression with output size being exactly what's necessary and external state (should work) */ + FUZ_DISPLAYTEST("test LZ4_compress_fast_extState() with output buffer just the right size"); + ret = LZ4_compress_fast_extState(stateLZ4, block, compressedBuffer, blockSize, compressedSize, 1); + FUZ_CHECKTEST(ret==0, "LZ4_compress_fast_extState() failed despite sufficient space"); + + /* Test HC compression with output size being exactly what's necessary (should work) */ + FUZ_DISPLAYTEST("test LZ4_compress_HC() with output buffer just the right size"); + ret = LZ4_compress_HC(block, compressedBuffer, blockSize, HCcompressedSize, compressionLevel); + FUZ_CHECKTEST(ret==0, "LZ4_compress_HC() failed despite sufficient space"); + + /* Test HC compression with output size being exactly what's necessary (should work) */ + FUZ_DISPLAYTEST("test LZ4_compress_HC_extStateHC() with output buffer just the right size"); + ret = LZ4_compress_HC_extStateHC(stateLZ4HC, block, compressedBuffer, blockSize, HCcompressedSize, compressionLevel); + FUZ_CHECKTEST(ret==0, "LZ4_compress_HC_extStateHC() failed despite sufficient space"); + + /* Test compression with missing bytes into output buffer => must fail */ + FUZ_DISPLAYTEST("test LZ4_compress_default() with output buffer a bit too short"); + { int missingBytes = (FUZ_rand(&randState) % 0x3F) + 1; + if (missingBytes >= compressedSize) missingBytes = compressedSize-1; + missingBytes += !missingBytes; /* avoid special case missingBytes==0 */ + compressedBuffer[compressedSize-missingBytes] = 0; + { int const cSize = LZ4_compress_default(block, compressedBuffer, blockSize, compressedSize-missingBytes); + FUZ_CHECKTEST(cSize, "LZ4_compress_default should have failed (output buffer too small by %i byte)", missingBytes); + } + FUZ_CHECKTEST(compressedBuffer[compressedSize-missingBytes], "LZ4_compress_default overran output buffer ! (%i missingBytes)", missingBytes) + } + + /* Test HC compression with missing bytes into output buffer => must fail */ + FUZ_DISPLAYTEST("test LZ4_compress_HC() with output buffer a bit too short"); + { int missingBytes = (FUZ_rand(&randState) % 0x3F) + 1; + if (missingBytes >= HCcompressedSize) missingBytes = HCcompressedSize-1; + missingBytes += !missingBytes; /* avoid special case missingBytes==0 */ + compressedBuffer[HCcompressedSize-missingBytes] = 0; + { int const hcSize = LZ4_compress_HC(block, compressedBuffer, blockSize, HCcompressedSize-missingBytes, compressionLevel); + FUZ_CHECKTEST(hcSize, "LZ4_compress_HC should have failed (output buffer too small by %i byte)", missingBytes); + } + FUZ_CHECKTEST(compressedBuffer[HCcompressedSize-missingBytes], "LZ4_compress_HC overran output buffer ! (%i missingBytes)", missingBytes) + } + + + /*-******************/ + /* Dictionary tests */ + /*-******************/ + + /* Compress using dictionary */ + FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary of size %i", dictSize); + { LZ4_stream_t LZ4_stream; + LZ4_initStream(&LZ4_stream, sizeof(LZ4_stream)); + LZ4_compress_fast_continue (&LZ4_stream, dict, compressedBuffer, dictSize, (int)compressedBufferSize, 1); /* Just to fill hash tables */ + blockContinueCompressedSize = LZ4_compress_fast_continue (&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1); + FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue failed"); + } + + /* Decompress with dictionary as prefix */ + FUZ_DISPLAYTEST("test LZ4_decompress_fast_usingDict() with dictionary as prefix"); + memcpy(decodedBuffer, dict, dictSize); + ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer+dictSize, blockSize, decodedBuffer, dictSize); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input"); + { U32 const crcCheck = XXH32(decodedBuffer+dictSize, (size_t)blockSize, 0); + if (crcCheck!=crcOrig) { + FUZ_findDiff(block, decodedBuffer); + EXIT_MSG("LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize); + } } + + FUZ_DISPLAYTEST("test LZ4_decompress_safe_usingDict()"); + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer+dictSize, blockContinueCompressedSize, blockSize, decodedBuffer, dictSize); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data"); + { U32 const crcCheck = XXH32(decodedBuffer+dictSize, (size_t)blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data"); + } + + /* Compress using External dictionary */ + FUZ_DISPLAYTEST("test LZ4_compress_fast_continue(), with non-contiguous dictionary"); + dict -= (size_t)(FUZ_rand(&randState) & 0xF) + 1; /* create space, so now dictionary is an ExtDict */ + if (dict < (char*)CNBuffer) dict = (char*)CNBuffer; + LZ4_loadDict(&LZ4dictBody, dict, dictSize); + blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4dictBody, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1); + FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue failed"); + + FUZ_DISPLAYTEST("LZ4_compress_fast_continue() with dictionary and output buffer too short by one byte"); + LZ4_loadDict(&LZ4dictBody, dict, dictSize); + ret = LZ4_compress_fast_continue(&LZ4dictBody, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1); + FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using ExtDict should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize); + + FUZ_DISPLAYTEST("test LZ4_compress_fast_continue() with dictionary loaded with LZ4_loadDict()"); + DISPLAYLEVEL(5, " compress %i bytes from buffer(%p) into dst(%p) using dict(%p) of size %i \n", + blockSize, (const void *)block, (void *)decodedBuffer, (const void *)dict, dictSize); + LZ4_loadDict(&LZ4dictBody, dict, dictSize); + ret = LZ4_compress_fast_continue(&LZ4dictBody, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize); + FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue should work : enough size available within output buffer"); + + /* Decompress with dictionary as external */ + FUZ_DISPLAYTEST("test LZ4_decompress_fast_usingDict() with dictionary as extDict"); + DISPLAYLEVEL(5, " decoding %i bytes from buffer(%p) using dict(%p) of size %i \n", + blockSize, (void *)decodedBuffer, (const void *)dict, dictSize); + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize, dict, dictSize); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_fast_usingDict overrun specified output buffer size"); + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + if (crcCheck!=crcOrig) { + FUZ_findDiff(block, decodedBuffer); + EXIT_MSG("LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize); + } } + + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size"); + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data"); + } + + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize-1] = 0; + ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize-1, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast_usingDict should have failed : wrong original size (-1 byte)"); + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast_usingDict overrun specified output buffer size"); + + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize-1] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-1, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)"); + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size"); + + FUZ_DISPLAYTEST(); + { int const missingBytes = (FUZ_rand(&randState) & 0xF) + 2; + if (blockSize > missingBytes) { + decodedBuffer[blockSize-missingBytes] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-missingBytes, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : output buffer too small (-%i byte)", missingBytes); + FUZ_CHECKTEST(decodedBuffer[blockSize-missingBytes], "LZ4_decompress_safe_usingDict overrun specified output buffer size (-%i byte) (blockSize=%i)", missingBytes, blockSize); + } } + + /* Compress using external dictionary stream */ + { LZ4_stream_t LZ4_stream; + int expectedSize; + U32 expectedCrc; + + FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_loadDict()"); + LZ4_loadDict(&LZ4dictBody, dict, dictSize); + expectedSize = LZ4_compress_fast_continue(&LZ4dictBody, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1); + FUZ_CHECKTEST(expectedSize<=0, "LZ4_compress_fast_continue reference compression for extDictCtx should have succeeded"); + expectedCrc = XXH32(compressedBuffer, (size_t)expectedSize, 0); + + FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary()"); + LZ4_loadDict(&LZ4dictBody, dict, dictSize); + LZ4_initStream(&LZ4_stream, sizeof(LZ4_stream)); + LZ4_attach_dictionary(&LZ4_stream, &LZ4dictBody); + blockContinueCompressedSize = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, (int)compressedBufferSize, 1); + FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_fast_continue using extDictCtx failed"); + + /* In the future, it might be desirable to let extDictCtx mode's + * output diverge from the output generated by regular extDict mode. + * Until that time, this comparison serves as a good regression + * test. + */ + FUZ_CHECKTEST(blockContinueCompressedSize != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output (%d expected vs %d actual)", expectedSize, blockContinueCompressedSize); + FUZ_CHECKTEST(XXH32(compressedBuffer, (size_t)blockContinueCompressedSize, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output"); + + FUZ_DISPLAYTEST("LZ4_compress_fast_continue() after LZ4_attach_dictionary(), but output buffer is 1 byte too short"); + LZ4_resetStream_fast(&LZ4_stream); + LZ4_attach_dictionary(&LZ4_stream, &LZ4dictBody); + ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize-1, 1); + FUZ_CHECKTEST(ret>0, "LZ4_compress_fast_continue using extDictCtx should fail : one missing byte for output buffer : %i written, %i buffer", ret, blockContinueCompressedSize); + /* note : context is no longer dirty after a failed compressed block */ + + FUZ_DISPLAYTEST(); + LZ4_resetStream_fast(&LZ4_stream); + LZ4_attach_dictionary(&LZ4_stream, &LZ4dictBody); + ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize); + FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx should work : enough size available within output buffer"); + FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output"); + FUZ_CHECKTEST(XXH32(compressedBuffer, (size_t)ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output"); + + FUZ_DISPLAYTEST(); + LZ4_resetStream_fast(&LZ4_stream); + LZ4_attach_dictionary(&LZ4_stream, &LZ4dictBody); + ret = LZ4_compress_fast_continue(&LZ4_stream, block, compressedBuffer, blockSize, blockContinueCompressedSize, 1); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_limitedOutput_compressed size is different (%i != %i)", ret, blockContinueCompressedSize); + FUZ_CHECKTEST(ret<=0, "LZ4_compress_fast_continue using extDictCtx with re-used context should work : enough size available within output buffer"); + FUZ_CHECKTEST(ret != expectedSize, "LZ4_compress_fast_continue using extDictCtx produced different-sized output"); + FUZ_CHECKTEST(XXH32(compressedBuffer, (size_t)ret, 0) != expectedCrc, "LZ4_compress_fast_continue using extDictCtx produced different output"); + } + + /* Decompress with dictionary as external */ + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize, dict, dictSize); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_decompress_fast_usingDict did not read all compressed block input"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_fast_usingDict overrun specified output buffer size"); + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + if (crcCheck!=crcOrig) { + FUZ_findDiff(block, decodedBuffer); + EXIT_MSG("LZ4_decompress_fast_usingDict corrupted decoded data (dict %i)", dictSize); + } } + + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size"); + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + FUZ_CHECKTEST(crcCheck!=crcOrig, "LZ4_decompress_safe_usingDict corrupted decoded data"); + } + + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize-1] = 0; + ret = LZ4_decompress_fast_usingDict(compressedBuffer, decodedBuffer, blockSize-1, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_fast_usingDict should have failed : wrong original size (-1 byte)"); + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_fast_usingDict overrun specified output buffer size"); + + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize-1] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-1, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : not enough output size (-1 byte)"); + FUZ_CHECKTEST(decodedBuffer[blockSize-1], "LZ4_decompress_safe_usingDict overrun specified output buffer size"); + + FUZ_DISPLAYTEST("LZ4_decompress_safe_usingDict with a too small output buffer"); + { int const missingBytes = (FUZ_rand(&randState) & 0xF) + 2; + if (blockSize > missingBytes) { + decodedBuffer[blockSize-missingBytes] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize-missingBytes, dict, dictSize); + FUZ_CHECKTEST(ret>=0, "LZ4_decompress_safe_usingDict should have failed : output buffer too small (-%i byte)", missingBytes); + FUZ_CHECKTEST(decodedBuffer[blockSize-missingBytes], "LZ4_decompress_safe_usingDict overrun specified output buffer size (-%i byte) (blockSize=%i)", missingBytes, blockSize); + } } + + /* Compress HC using External dictionary */ + FUZ_DISPLAYTEST("LZ4_compress_HC_continue with an external dictionary"); + dict -= (FUZ_rand(&randState) & 7); /* even bigger separation */ + if (dict < (char*)CNBuffer) dict = (char*)CNBuffer; + LZ4_loadDictHC(LZ4dictHC, dict, dictSize); + LZ4_setCompressionLevel (LZ4dictHC, compressionLevel); + blockContinueCompressedSize = LZ4_compress_HC_continue(LZ4dictHC, block, compressedBuffer, blockSize, (int)compressedBufferSize); + FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue failed"); + FUZ_CHECKTEST(LZ4dictHC->internal_donotuse.dirty, "Context should be clean"); + + FUZ_DISPLAYTEST("LZ4_compress_HC_continue with same external dictionary, but output buffer 1 byte too short"); + LZ4_loadDictHC(LZ4dictHC, dict, dictSize); + ret = LZ4_compress_HC_continue(LZ4dictHC, block, compressedBuffer, blockSize, blockContinueCompressedSize-1); + FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDict should fail : one missing byte for output buffer (expected %i, but result=%i)", blockContinueCompressedSize, ret); + /* note : context is no longer dirty after a failed compressed block */ + + FUZ_DISPLAYTEST("LZ4_compress_HC_continue with same external dictionary, and output buffer exactly the right size"); + LZ4_loadDictHC(LZ4dictHC, dict, dictSize); + ret = LZ4_compress_HC_continue(LZ4dictHC, block, compressedBuffer, blockSize, blockContinueCompressedSize); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue size is different : ret(%i) != expected(%i)", ret, blockContinueCompressedSize); + FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue should work : enough size available within output buffer"); + FUZ_CHECKTEST(LZ4dictHC->internal_donotuse.dirty, "Context should be clean"); + + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size"); + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + if (crcCheck!=crcOrig) { + FUZ_findDiff(block, decodedBuffer); + EXIT_MSG("LZ4_decompress_safe_usingDict corrupted decoded data"); + } } + + /* Compress HC using external dictionary stream */ + FUZ_DISPLAYTEST(); + { LZ4_streamHC_t* const LZ4_streamHC = LZ4_createStreamHC(); + + LZ4_loadDictHC(LZ4dictHC, dict, dictSize); + LZ4_attach_HC_dictionary(LZ4_streamHC, LZ4dictHC); + LZ4_setCompressionLevel (LZ4_streamHC, compressionLevel); + blockContinueCompressedSize = LZ4_compress_HC_continue(LZ4_streamHC, block, compressedBuffer, blockSize, (int)compressedBufferSize); + FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue with ExtDictCtx failed"); + FUZ_CHECKTEST(LZ4_streamHC->internal_donotuse.dirty, "Context should be clean"); + + FUZ_DISPLAYTEST(); + LZ4_resetStreamHC_fast (LZ4_streamHC, compressionLevel); + LZ4_attach_HC_dictionary(LZ4_streamHC, LZ4dictHC); + ret = LZ4_compress_HC_continue(LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize-1); + FUZ_CHECKTEST(ret>0, "LZ4_compress_HC_continue using ExtDictCtx should fail : one missing byte for output buffer (%i != %i)", ret, blockContinueCompressedSize); + /* note : context is no longer dirty after a failed compressed block */ + + FUZ_DISPLAYTEST(); + LZ4_resetStreamHC_fast (LZ4_streamHC, compressionLevel); + LZ4_attach_HC_dictionary(LZ4_streamHC, LZ4dictHC); + ret = LZ4_compress_HC_continue(LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx size is different (%i != %i)", ret, blockContinueCompressedSize); + FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx should work : enough size available within output buffer"); + FUZ_CHECKTEST(LZ4_streamHC->internal_donotuse.dirty, "Context should be clean"); + + FUZ_DISPLAYTEST(); + LZ4_resetStreamHC_fast (LZ4_streamHC, compressionLevel); + LZ4_attach_HC_dictionary(LZ4_streamHC, LZ4dictHC); + ret = LZ4_compress_HC_continue(LZ4_streamHC, block, compressedBuffer, blockSize, blockContinueCompressedSize); + FUZ_CHECKTEST(ret!=blockContinueCompressedSize, "LZ4_compress_HC_continue using ExtDictCtx and fast reset size is different (%i != %i)", + ret, blockContinueCompressedSize); + FUZ_CHECKTEST(ret<=0, "LZ4_compress_HC_continue using ExtDictCtx and fast reset should work : enough size available within output buffer"); + FUZ_CHECKTEST(LZ4_streamHC->internal_donotuse.dirty, "Context should be clean"); + + LZ4_freeStreamHC(LZ4_streamHC); + } + + FUZ_DISPLAYTEST(); + decodedBuffer[blockSize] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, blockSize, dict, dictSize); + FUZ_CHECKTEST(ret!=blockSize, "LZ4_decompress_safe_usingDict did not regenerate original data"); + FUZ_CHECKTEST(decodedBuffer[blockSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size"); + { U32 const crcCheck = XXH32(decodedBuffer, (size_t)blockSize, 0); + if (crcCheck!=crcOrig) { + FUZ_findDiff(block, decodedBuffer); + EXIT_MSG("LZ4_decompress_safe_usingDict corrupted decoded data"); + } } + + /* Compress HC continue destSize */ + FUZ_DISPLAYTEST(); + { int const availableSpace = (int)(FUZ_rand(&randState) % (U32)blockSize) + 5; + int consumedSize = blockSize; + FUZ_DISPLAYTEST(); + LZ4_loadDictHC(LZ4dictHC, dict, dictSize); + LZ4_setCompressionLevel(LZ4dictHC, compressionLevel); + blockContinueCompressedSize = LZ4_compress_HC_continue_destSize(LZ4dictHC, block, compressedBuffer, &consumedSize, availableSpace); + DISPLAYLEVEL(5, " LZ4_compress_HC_continue_destSize : compressed %6i/%6i into %6i/%6i at cLevel=%i \n", + consumedSize, blockSize, blockContinueCompressedSize, availableSpace, compressionLevel); + FUZ_CHECKTEST(blockContinueCompressedSize==0, "LZ4_compress_HC_continue_destSize failed"); + FUZ_CHECKTEST(blockContinueCompressedSize > availableSpace, "LZ4_compress_HC_continue_destSize write overflow"); + FUZ_CHECKTEST(consumedSize > blockSize, "LZ4_compress_HC_continue_destSize read overflow"); + + FUZ_DISPLAYTEST(); + decodedBuffer[consumedSize] = 0; + ret = LZ4_decompress_safe_usingDict(compressedBuffer, decodedBuffer, blockContinueCompressedSize, consumedSize, dict, dictSize); + FUZ_CHECKTEST(ret != consumedSize, "LZ4_decompress_safe_usingDict regenerated %i bytes (%i expected)", ret, consumedSize); + FUZ_CHECKTEST(decodedBuffer[consumedSize], "LZ4_decompress_safe_usingDict overrun specified output buffer size") + { U32 const crcSrc = XXH32(block, (size_t)consumedSize, 0); + U32 const crcDst = XXH32(decodedBuffer, (size_t)consumedSize, 0); + if (crcSrc!=crcDst) { + FUZ_findDiff(block, decodedBuffer); + EXIT_MSG("LZ4_decompress_safe_usingDict corrupted decoded data"); + } } + } + + /* ***** End of tests *** */ + /* Fill stats */ + assert(blockSize >= 0); + bytes += (unsigned)blockSize; + assert(compressedSize >= 0); + cbytes += (unsigned)compressedSize; + assert(HCcompressedSize >= 0); + hcbytes += (unsigned)HCcompressedSize; + assert(blockContinueCompressedSize >= 0); + ccbytes += (unsigned)blockContinueCompressedSize; + } + + if (nbCycles<=1) nbCycles = cycleNb; /* end by time */ + bytes += !bytes; /* avoid division by 0 */ + printf("\r%7u /%7u - ", cycleNb, nbCycles); + printf("all tests completed successfully \n"); + printf("compression ratio: %0.3f%%\n", (double)cbytes/bytes*100); + printf("HC compression ratio: %0.3f%%\n", (double)hcbytes/bytes*100); + printf("ratio with dict: %0.3f%%\n", (double)ccbytes/bytes*100); + + /* release memory */ + free(CNBuffer); + free(compressedBuffer); + free(decodedBuffer); + FUZ_freeLowAddr(lowAddrBuffer, labSize); + LZ4_freeStreamHC(LZ4dictHC); + free(stateLZ4); + free(stateLZ4HC); + return result; +} + + +#define testInputSize (196 KB) +#define testCompressedSize (130 KB) +#define ringBufferSize (8 KB) + +static void FUZ_unitTests(int compressionLevel) +{ + const unsigned testNb = 0; + const unsigned seed = 0; + const unsigned cycleNb= 0; + char* testInput = (char*)malloc(testInputSize); + char* testCompressed = (char*)malloc(testCompressedSize); + char* testVerify = (char*)malloc(testInputSize); + char ringBuffer[ringBufferSize] = {0}; + U32 randState = 1; + + /* Init */ + if (!testInput || !testCompressed || !testVerify) { + EXIT_MSG("not enough memory for FUZ_unitTests"); + } + FUZ_fillCompressibleNoiseBuffer(testInput, testInputSize, 0.50, &randState); + + /* 32-bits address space overflow test */ + FUZ_AddressOverflow(); + + /* Test decoding with empty input */ + DISPLAYLEVEL(3, "LZ4_decompress_safe() with empty input \n"); + LZ4_decompress_safe(testCompressed, testVerify, 0, testInputSize); + + /* Test decoding with a one byte input */ + DISPLAYLEVEL(3, "LZ4_decompress_safe() with one byte input \n"); + { char const tmp = (char)0xFF; + LZ4_decompress_safe(&tmp, testVerify, 1, testInputSize); + } + + /* Test decoding shortcut edge case */ + DISPLAYLEVEL(3, "LZ4_decompress_safe() with shortcut edge case \n"); + { char tmp[17]; + /* 14 bytes of literals, followed by a 14 byte match. + * Should not read beyond the end of the buffer. + * See https://github.com/lz4/lz4/issues/508. */ + *tmp = (char)0xEE; + memset(tmp + 1, 0, 14); + tmp[15] = 14; + tmp[16] = 0; + { int const r = LZ4_decompress_safe(tmp, testVerify, sizeof(tmp), testInputSize); + FUZ_CHECKTEST(r >= 0, "LZ4_decompress_safe() should fail"); + } } + + + /* to be tested with undefined sanitizer */ + DISPLAYLEVEL(3, "LZ4_compress_default() with NULL input:"); + { int const maxCSize = LZ4_compressBound(0); + int const cSize = LZ4_compress_default(NULL, testCompressed, 0, maxCSize); + FUZ_CHECKTEST(!(cSize==1 && testCompressed[0]==0), + "compressing empty should give byte 0" + " (maxCSize == %i) (cSize == %i) (byte == 0x%02X)", + maxCSize, cSize, testCompressed[0]); + } + DISPLAYLEVEL(3, " OK \n"); + + DISPLAYLEVEL(3, "LZ4_compress_default() with both NULL input and output:"); + { int const cSize = LZ4_compress_default(NULL, NULL, 0, 0); + FUZ_CHECKTEST(cSize != 0, + "compressing into NULL must fail" + " (cSize == %i != 0)", cSize); + } + DISPLAYLEVEL(3, " OK \n"); + + /* in-place compression test */ + DISPLAYLEVEL(3, "in-place compression using LZ4_compress_default() :"); + { int const sampleSize = 65 KB; + int const maxCSize = LZ4_COMPRESSBOUND(sampleSize); + int const outSize = LZ4_COMPRESS_INPLACE_BUFFER_SIZE(maxCSize); + int const startInputIndex = outSize - sampleSize; + char* const startInput = testCompressed + startInputIndex; + XXH32_hash_t const crcOrig = XXH32(testInput, sampleSize, 0); + int cSize; + assert(outSize < (int)testCompressedSize); + memcpy(startInput, testInput, sampleSize); /* copy at end of buffer */ + /* compress in-place */ + cSize = LZ4_compress_default(startInput, testCompressed, sampleSize, maxCSize); + assert(cSize != 0); /* ensure compression is successful */ + assert(maxCSize < INT_MAX); + assert(cSize <= maxCSize); + /* decompress and verify */ + { int const dSize = LZ4_decompress_safe(testCompressed, testVerify, cSize, testInputSize); + assert(dSize == sampleSize); /* correct size */ + { XXH32_hash_t const crcCheck = XXH32(testVerify, (size_t)dSize, 0); + FUZ_CHECKTEST(crcCheck != crcOrig, "LZ4_decompress_safe decompression corruption"); + } } } + DISPLAYLEVEL(3, " OK \n"); + + /* in-place decompression test */ + DISPLAYLEVEL(3, "in-place decompression, limit case:"); + { int const sampleSize = 65 KB; + + FUZ_fillCompressibleNoiseBuffer(testInput, sampleSize, 0.0, &randState); + memset(testInput, 0, 267); /* calculated exactly so that compressedSize == originalSize-1 */ + + { XXH64_hash_t const crcOrig = XXH64(testInput, sampleSize, 0); + int const cSize = LZ4_compress_default(testInput, testCompressed, sampleSize, testCompressedSize); + assert(cSize == sampleSize - 1); /* worst case for in-place decompression */ + + { int const bufferSize = LZ4_DECOMPRESS_INPLACE_BUFFER_SIZE(sampleSize); + int const startInputIndex = bufferSize - cSize; + char* const startInput = testVerify + startInputIndex; + memcpy(startInput, testCompressed, cSize); + + /* decompress and verify */ + { int const dSize = LZ4_decompress_safe(startInput, testVerify, cSize, sampleSize); + assert(dSize == sampleSize); /* correct size */ + { XXH64_hash_t const crcCheck = XXH64(testVerify, (size_t)dSize, 0); + FUZ_CHECKTEST(crcCheck != crcOrig, "LZ4_decompress_safe decompression corruption"); + } } } } } + DISPLAYLEVEL(3, " OK \n"); + + DISPLAYLEVEL(3, "LZ4_initStream with multiple valid alignments : "); + { typedef struct { + LZ4_stream_t state1; + LZ4_stream_t state2; + char c; + LZ4_stream_t state3; + } shct; + shct* const shc = (shct*)malloc(sizeof(*shc)); + assert(shc != NULL); + memset(shc, 0, sizeof(*shc)); + DISPLAYLEVEL(4, "state1(%p) state2(%p) state3(%p) LZ4_stream_t size(0x%x): ", + &(shc->state1), &(shc->state2), &(shc->state3), (unsigned)sizeof(LZ4_stream_t)); + FUZ_CHECKTEST( LZ4_initStream(&(shc->state1), sizeof(shc->state1)) == NULL, "state1 (%p) failed init", &(shc->state1) ); + FUZ_CHECKTEST( LZ4_initStream(&(shc->state2), sizeof(shc->state2)) == NULL, "state2 (%p) failed init", &(shc->state2) ); + FUZ_CHECKTEST( LZ4_initStream(&(shc->state3), sizeof(shc->state3)) == NULL, "state3 (%p) failed init", &(shc->state3) ); + FUZ_CHECKTEST( LZ4_initStream((char*)&(shc->state1) + 1, sizeof(shc->state1)) != NULL, + "hc1+1 (%p) init must fail, due to bad alignment", (char*)&(shc->state1) + 1 ); + free(shc); + } + DISPLAYLEVEL(3, "all inits OK \n"); + + /* Allocation test */ + { LZ4_stream_t* const statePtr = LZ4_createStream(); + FUZ_CHECKTEST(statePtr==NULL, "LZ4_createStream() allocation failed"); + LZ4_freeStream(statePtr); + } + + /* LZ4 streaming tests */ + { LZ4_stream_t streamingState; + + /* simple compression test */ + LZ4_initStream(&streamingState, sizeof(streamingState)); + { int const cs = LZ4_compress_fast_continue(&streamingState, testInput, testCompressed, testCompressedSize, testCompressedSize-1, 1); + FUZ_CHECKTEST(cs==0, "LZ4_compress_fast_continue() compression failed!"); + { int const r = LZ4_decompress_safe(testCompressed, testVerify, cs, testCompressedSize); + FUZ_CHECKTEST(r!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed"); + } } + { U64 const crcOrig = XXH64(testInput, testCompressedSize, 0); + U64 const crcNew = XXH64(testVerify, testCompressedSize, 0); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe() decompression corruption"); + } + + /* early saveDict */ + DISPLAYLEVEL(3, "saveDict (right after init) : "); + { LZ4_stream_t* const ctx = LZ4_initStream(&streamingState, sizeof(streamingState)); + assert(ctx != NULL); /* ensure init is successful */ + + /* Check access violation with asan */ + FUZ_CHECKTEST( LZ4_saveDict(ctx, NULL, 0) != 0, + "LZ4_saveDict() can't save anything into (NULL,0)"); + + /* Check access violation with asan */ + { char tmp_buffer[240] = { 0 }; + FUZ_CHECKTEST( LZ4_saveDict(ctx, tmp_buffer, sizeof(tmp_buffer)) != 0, + "LZ4_saveDict() can't save anything since compression hasn't started"); + } } + DISPLAYLEVEL(3, "OK \n"); + + /* ring buffer test */ + { XXH64_state_t xxhOrig; + XXH64_state_t xxhNewSafe, xxhNewFast; + LZ4_streamDecode_t decodeStateSafe, decodeStateFast; + const U32 maxMessageSizeLog = 10; + const U32 maxMessageSizeMask = (1< ringBufferSize) rNext = 0; + if (dNext + messageSize > dBufferSize) dNext = 0; + } } + } + + DISPLAYLEVEL(3, "LZ4_initStreamHC with multiple valid alignments : "); + { typedef struct { + LZ4_streamHC_t hc1; + LZ4_streamHC_t hc2; + char c; + LZ4_streamHC_t hc3; + } shct; + shct* const shc = (shct*)malloc(sizeof(*shc)); + assert(shc != NULL); + memset(shc, 0, sizeof(*shc)); + DISPLAYLEVEL(4, "hc1(%p) hc2(%p) hc3(%p) size(0x%x): ", + &(shc->hc1), &(shc->hc2), &(shc->hc3), (unsigned)sizeof(LZ4_streamHC_t)); + FUZ_CHECKTEST( LZ4_initStreamHC(&(shc->hc1), sizeof(shc->hc1)) == NULL, "hc1 (%p) failed init", &(shc->hc1) ); + FUZ_CHECKTEST( LZ4_initStreamHC(&(shc->hc2), sizeof(shc->hc2)) == NULL, "hc2 (%p) failed init", &(shc->hc2) ); + FUZ_CHECKTEST( LZ4_initStreamHC(&(shc->hc3), sizeof(shc->hc3)) == NULL, "hc3 (%p) failed init", &(shc->hc3) ); + FUZ_CHECKTEST( LZ4_initStreamHC((char*)&(shc->hc1) + 1, sizeof(shc->hc1)) != NULL, + "hc1+1 (%p) init must fail, due to bad alignment", (char*)&(shc->hc1) + 1 ); + free(shc); + } + DISPLAYLEVEL(3, "all inits OK \n"); + + /* LZ4 HC streaming tests */ + { LZ4_streamHC_t sHC; /* statically allocated */ + int result; + LZ4_initStreamHC(&sHC, sizeof(sHC)); + + /* Allocation test */ + DISPLAYLEVEL(3, "Basic HC allocation : "); + { LZ4_streamHC_t* const sp = LZ4_createStreamHC(); + FUZ_CHECKTEST(sp==NULL, "LZ4_createStreamHC() allocation failed"); + LZ4_freeStreamHC(sp); + } + DISPLAYLEVEL(3, "OK \n"); + + /* simple HC compression test */ + DISPLAYLEVEL(3, "Simple HC round-trip : "); + { U64 const crc64 = XXH64(testInput, testCompressedSize, 0); + LZ4_setCompressionLevel(&sHC, compressionLevel); + result = LZ4_compress_HC_continue(&sHC, testInput, testCompressed, testCompressedSize, testCompressedSize-1); + FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() compression failed"); + FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean"); + + result = LZ4_decompress_safe(testCompressed, testVerify, result, testCompressedSize); + FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe() decompression failed"); + { U64 const crcNew = XXH64(testVerify, testCompressedSize, 0); + FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe() decompression corruption"); + } } + DISPLAYLEVEL(3, "OK \n"); + + /* saveDictHC test #926 */ + DISPLAYLEVEL(3, "saveDictHC test #926 : "); + { LZ4_streamHC_t* const ctx = LZ4_initStreamHC(&sHC, sizeof(sHC)); + assert(ctx != NULL); /* ensure init is successful */ + + /* Check access violation with asan */ + FUZ_CHECKTEST( LZ4_saveDictHC(ctx, NULL, 0) != 0, + "LZ4_saveDictHC() can't save anything into (NULL,0)"); + + /* Check access violation with asan */ + { char tmp_buffer[240] = { 0 }; + FUZ_CHECKTEST( LZ4_saveDictHC(ctx, tmp_buffer, sizeof(tmp_buffer)) != 0, + "LZ4_saveDictHC() can't save anything since compression hasn't started"); + } } + DISPLAYLEVEL(3, "OK \n"); + + /* long sequence test */ + DISPLAYLEVEL(3, "Long sequence HC_destSize test : "); + { size_t const blockSize = 1 MB; + size_t const targetSize = 4116; /* size carefully selected to trigger an overflow */ + void* const block = malloc(blockSize); + void* const dstBlock = malloc(targetSize+1); + BYTE const sentinel = 101; + int srcSize; + + assert(block != NULL); assert(dstBlock != NULL); + memset(block, 0, blockSize); + ((char*)dstBlock)[targetSize] = sentinel; + + LZ4_resetStreamHC_fast(&sHC, 3); + assert(blockSize < INT_MAX); + srcSize = (int)blockSize; + assert(targetSize < INT_MAX); + result = LZ4_compress_HC_destSize(&sHC, (const char*)block, (char*)dstBlock, &srcSize, (int)targetSize, 3); + DISPLAYLEVEL(4, "cSize=%i; readSize=%i; ", result, srcSize); + FUZ_CHECKTEST(result != 4116, "LZ4_compress_HC_destSize() : " + "compression (%i->%i) must fill dstBuffer (%i) exactly", + srcSize, result, (int)targetSize); + FUZ_CHECKTEST(((char*)dstBlock)[targetSize] != sentinel, + "LZ4_compress_HC_destSize() overwrites dst buffer"); + FUZ_CHECKTEST(srcSize < 1045000, "LZ4_compress_HC_destSize() doesn't compress enough" + " (%i -> %i , expected > %i)", srcSize, result, 1045000); + + LZ4_resetStreamHC_fast(&sHC, 3); /* make sure the context is clean after the test */ + free(block); + free(dstBlock); + } + DISPLAYLEVEL(3, " OK \n"); + + /* simple dictionary HC compression test */ + DISPLAYLEVEL(3, "HC dictionary compression test : "); + { U64 const crc64 = XXH64(testInput + 64 KB, testCompressedSize, 0); + LZ4_resetStreamHC_fast(&sHC, compressionLevel); + LZ4_loadDictHC(&sHC, testInput, 64 KB); + { int const cSize = LZ4_compress_HC_continue(&sHC, testInput + 64 KB, testCompressed, testCompressedSize, testCompressedSize-1); + FUZ_CHECKTEST(cSize==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : @return = %i", cSize); + FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean"); + { int const dSize = LZ4_decompress_safe_usingDict(testCompressed, testVerify, cSize, testCompressedSize, testInput, 64 KB); + FUZ_CHECKTEST(dSize!=(int)testCompressedSize, "LZ4_decompress_safe() simple dictionary decompression test failed"); + } } + { U64 const crcNew = XXH64(testVerify, testCompressedSize, 0); + FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe() simple dictionary decompression test : corruption"); + } } + DISPLAYLEVEL(3, " OK \n"); + + /* multiple HC compression test with dictionary */ + { int result1, result2; + int segSize = testCompressedSize / 2; + XXH64_hash_t const crc64 = ( (void)assert((unsigned)segSize + testCompressedSize < testInputSize) , + XXH64(testInput + segSize, testCompressedSize, 0) ); + LZ4_resetStreamHC_fast(&sHC, compressionLevel); + LZ4_loadDictHC(&sHC, testInput, segSize); + result1 = LZ4_compress_HC_continue(&sHC, testInput + segSize, testCompressed, segSize, segSize -1); + FUZ_CHECKTEST(result1==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result1); + FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean"); + result2 = LZ4_compress_HC_continue(&sHC, testInput + 2*(size_t)segSize, testCompressed+result1, segSize, segSize-1); + FUZ_CHECKTEST(result2==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result2); + FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean"); + + result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result1, segSize, testInput, segSize); + FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 1 failed"); + result = LZ4_decompress_safe_usingDict(testCompressed+result1, testVerify+segSize, result2, segSize, testInput, 2*segSize); + FUZ_CHECKTEST(result!=segSize, "LZ4_decompress_safe() dictionary decompression part 2 failed"); + { XXH64_hash_t const crcNew = XXH64(testVerify, testCompressedSize, 0); + FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe() dictionary decompression corruption"); + } } + + /* remote dictionary HC compression test */ + { U64 const crc64 = XXH64(testInput + 64 KB, testCompressedSize, 0); + LZ4_resetStreamHC_fast(&sHC, compressionLevel); + LZ4_loadDictHC(&sHC, testInput, 32 KB); + result = LZ4_compress_HC_continue(&sHC, testInput + 64 KB, testCompressed, testCompressedSize, testCompressedSize-1); + FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() remote dictionary failed : result = %i", result); + FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean"); + + result = LZ4_decompress_safe_usingDict(testCompressed, testVerify, result, testCompressedSize, testInput, 32 KB); + FUZ_CHECKTEST(result!=(int)testCompressedSize, "LZ4_decompress_safe_usingDict() decompression failed following remote dictionary HC compression test"); + { U64 const crcNew = XXH64(testVerify, testCompressedSize, 0); + FUZ_CHECKTEST(crc64!=crcNew, "LZ4_decompress_safe_usingDict() decompression corruption"); + } } + + /* multiple HC compression with ext. dictionary */ + { XXH64_state_t crcOrigState; + XXH64_state_t crcNewState; + const char* dict = testInput + 3; + size_t dictSize = (FUZ_rand(&randState) & 8191); + char* dst = testVerify; + + size_t segStart = dictSize + 7; + size_t segSize = (FUZ_rand(&randState) & 8191); + int segNb = 1; + + LZ4_resetStreamHC_fast(&sHC, compressionLevel); + LZ4_loadDictHC(&sHC, dict, (int)dictSize); + + XXH64_reset(&crcOrigState, 0); + XXH64_reset(&crcNewState, 0); + + while (segStart + segSize < testInputSize) { + XXH64_hash_t crcOrig; + XXH64_update(&crcOrigState, testInput + segStart, segSize); + crcOrig = XXH64_digest(&crcOrigState); + assert(segSize <= INT_MAX); + result = LZ4_compress_HC_continue(&sHC, testInput + segStart, testCompressed, (int)segSize, LZ4_compressBound((int)segSize)); + FUZ_CHECKTEST(result==0, "LZ4_compressHC_limitedOutput_continue() dictionary compression failed : result = %i", result); + FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean"); + + result = LZ4_decompress_safe_usingDict(testCompressed, dst, result, (int)segSize, dict, (int)dictSize); + FUZ_CHECKTEST(result!=(int)segSize, "LZ4_decompress_safe_usingDict() dictionary decompression part %i failed", (int)segNb); + XXH64_update(&crcNewState, dst, segSize); + { U64 const crcNew = XXH64_digest(&crcNewState); + if (crcOrig != crcNew) FUZ_findDiff(dst, testInput+segStart); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_usingDict() part %i corruption", segNb); + } + + dict = dst; + dictSize = segSize; + + dst += segSize + 1; + segNb ++; + + segStart += segSize + (FUZ_rand(&randState) & 0xF) + 1; + segSize = (FUZ_rand(&randState) & 8191); + } } + + /* ring buffer test */ + { XXH64_state_t xxhOrig; + XXH64_state_t xxhNewSafe, xxhNewFast; + LZ4_streamDecode_t decodeStateSafe, decodeStateFast; + const U32 maxMessageSizeLog = 10; + const U32 maxMessageSizeMask = (1< ringBufferSize) rNext = 0; + if (dNext + messageSize > dBufferSize) dNext = 0; + } + } + + /* Ring buffer test : Non synchronized decoder */ + /* This test uses minimum amount of memory required to setup a decoding ring buffer + * while being unsynchronized with encoder + * (no assumption done on how the data is encoded, it just follows LZ4 format specification). + * This size is documented in lz4.h, and is LZ4_decoderRingBufferSize(maxBlockSize). + */ + { XXH64_state_t xxhOrig; + XXH64_state_t xxhNewSafe, xxhNewFast; + XXH64_hash_t crcOrig; + LZ4_streamDecode_t decodeStateSafe, decodeStateFast; + const int maxMessageSizeLog = 12; + const int maxMessageSize = 1 << maxMessageSizeLog; + const int maxMessageSizeMask = maxMessageSize - 1; + int messageSize; + U32 totalMessageSize = 0; + const int dBufferSize = LZ4_decoderRingBufferSize(maxMessageSize); + char* const ringBufferSafe = testVerify; + char* const ringBufferFast = testVerify + dBufferSize + 1; /* used by LZ4_decompress_fast_continue */ + int iNext = 0; + int dNext = 0; + int compressedSize; + + assert((size_t)dBufferSize * 2 + 1 < testInputSize); /* space used by ringBufferSafe and ringBufferFast */ + XXH64_reset(&xxhOrig, 0); + XXH64_reset(&xxhNewSafe, 0); + XXH64_reset(&xxhNewFast, 0); + LZ4_resetStreamHC_fast(&sHC, compressionLevel); + LZ4_setStreamDecode(&decodeStateSafe, NULL, 0); + LZ4_setStreamDecode(&decodeStateFast, NULL, 0); + +#define BSIZE1 (dBufferSize - (maxMessageSize-1)) + + /* first block */ + messageSize = BSIZE1; /* note : we cheat a bit here, in theory no message should be > maxMessageSize. We just want to fill the decoding ring buffer once. */ + XXH64_update(&xxhOrig, testInput + iNext, (size_t)messageSize); + crcOrig = XXH64_digest(&xxhOrig); + + compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize); + FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed"); + FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean"); + + result = LZ4_decompress_safe_continue(&decodeStateSafe, testCompressed, ringBufferSafe + dNext, compressedSize, messageSize); + FUZ_CHECKTEST(result!=messageSize, "64K D.ringBuffer : LZ4_decompress_safe_continue() test failed"); + + XXH64_update(&xxhNewSafe, ringBufferSafe + dNext, (size_t)messageSize); + { U64 const crcNew = XXH64_digest(&xxhNewSafe); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption"); } + + result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, ringBufferFast + dNext, messageSize); + FUZ_CHECKTEST(result!=compressedSize, "64K D.ringBuffer : LZ4_decompress_fast_continue() test failed"); + + XXH64_update(&xxhNewFast, ringBufferFast + dNext, (size_t)messageSize); + { U64 const crcNew = XXH64_digest(&xxhNewFast); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption"); } + + /* prepare second message */ + dNext += messageSize; + assert(messageSize >= 0); + totalMessageSize += (unsigned)messageSize; + messageSize = maxMessageSize; + iNext = BSIZE1+1; + assert(BSIZE1 >= 65535); + memcpy(testInput + iNext, testInput + (BSIZE1-65535), messageSize); /* will generate a match at max distance == 65535 */ + FUZ_CHECKTEST(dNext+messageSize <= dBufferSize, "Ring buffer test : second message should require restarting from beginning"); + dNext = 0; + + while (totalMessageSize < 9 MB) { + XXH64_update(&xxhOrig, testInput + iNext, (size_t)messageSize); + crcOrig = XXH64_digest(&xxhOrig); + + compressedSize = LZ4_compress_HC_continue(&sHC, testInput + iNext, testCompressed, messageSize, testCompressedSize-ringBufferSize); + FUZ_CHECKTEST(compressedSize==0, "LZ4_compress_HC_continue() compression failed"); + FUZ_CHECKTEST(sHC.internal_donotuse.dirty, "Context should be clean"); + DISPLAYLEVEL(5, "compressed %i bytes to %i bytes \n", messageSize, compressedSize); + + /* test LZ4_decompress_safe_continue */ + assert(dNext < dBufferSize); + assert(dBufferSize - dNext >= maxMessageSize); + result = LZ4_decompress_safe_continue(&decodeStateSafe, + testCompressed, ringBufferSafe + dNext, + compressedSize, dBufferSize - dNext); /* works without knowing messageSize, under assumption that messageSize <= maxMessageSize */ + FUZ_CHECKTEST(result!=messageSize, "D.ringBuffer : LZ4_decompress_safe_continue() test failed"); + XXH64_update(&xxhNewSafe, ringBufferSafe + dNext, (size_t)messageSize); + { U64 const crcNew = XXH64_digest(&xxhNewSafe); + if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, ringBufferSafe + dNext); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_safe_continue() decompression corruption during D.ringBuffer test"); + } + + /* test LZ4_decompress_fast_continue in its own buffer ringBufferFast */ + result = LZ4_decompress_fast_continue(&decodeStateFast, testCompressed, ringBufferFast + dNext, messageSize); + FUZ_CHECKTEST(result!=compressedSize, "D.ringBuffer : LZ4_decompress_fast_continue() test failed"); + XXH64_update(&xxhNewFast, ringBufferFast + dNext, (size_t)messageSize); + { U64 const crcNew = XXH64_digest(&xxhNewFast); + if (crcOrig != crcNew) FUZ_findDiff(testInput + iNext, ringBufferFast + dNext); + FUZ_CHECKTEST(crcOrig!=crcNew, "LZ4_decompress_fast_continue() decompression corruption during D.ringBuffer test"); + } + + /* prepare next message */ + dNext += messageSize; + assert(messageSize >= 0); + totalMessageSize += (unsigned)messageSize; + messageSize = (FUZ_rand(&randState) & maxMessageSizeMask) + 1; + iNext = (FUZ_rand(&randState) & 65535); + if (dNext + maxMessageSize > dBufferSize) dNext = 0; + } + } /* Ring buffer test : Non synchronized decoder */ + } + + DISPLAYLEVEL(3, "LZ4_compress_HC_destSize : "); + /* encode congenerical sequence test for HC compressors */ + { LZ4_streamHC_t* const sHC = LZ4_createStreamHC(); + int const src_buf_size = 3 MB; + int const dst_buf_size = 6 KB; + int const payload = 0; + int const dst_step = 43; + int const dst_min_len = 33 + (FUZ_rand(&randState) % dst_step); + int const dst_max_len = 5000; + int slen, dlen; + char* sbuf1 = (char*)malloc(src_buf_size + 1); + char* sbuf2 = (char*)malloc(src_buf_size + 1); + char* dbuf1 = (char*)malloc(dst_buf_size + 1); + char* dbuf2 = (char*)malloc(dst_buf_size + 1); + + assert(sHC != NULL); + assert(dst_buf_size > dst_max_len); + if (!sbuf1 || !sbuf2 || !dbuf1 || !dbuf2) { + EXIT_MSG("not enough memory for FUZ_unitTests (destSize)"); + } + for (dlen = dst_min_len; dlen <= dst_max_len; dlen += dst_step) { + int src_len = (dlen - 10)*255 + 24; + if (src_len + 10 >= src_buf_size) break; /* END of check */ + for (slen = src_len - 3; slen <= src_len + 3; slen++) { + int srcsz1, srcsz2; + int dsz1, dsz2; + int res1, res2; + char const endchk = (char)0x88; + DISPLAYLEVEL(5, "slen = %i, ", slen); + + srcsz1 = slen; + memset(sbuf1, payload, slen); + memset(dbuf1, 0, dlen); + dbuf1[dlen] = endchk; + dsz1 = LZ4_compress_destSize(sbuf1, dbuf1, &srcsz1, dlen); + DISPLAYLEVEL(5, "LZ4_compress_destSize: %i bytes compressed into %i bytes, ", srcsz1, dsz1); + DISPLAYLEVEL(5, "last token : 0x%0X, ", dbuf1[dsz1 - 6]); + DISPLAYLEVEL(5, "last ML extra lenbyte : 0x%0X, \n", dbuf1[dsz1 - 7]); + FUZ_CHECKTEST(dbuf1[dlen] != endchk, "LZ4_compress_destSize() overwrite dst buffer !"); + FUZ_CHECKTEST(dsz1 <= 0, "LZ4_compress_destSize() compression failed"); + FUZ_CHECKTEST(dsz1 > dlen, "LZ4_compress_destSize() result larger than dst buffer !"); + FUZ_CHECKTEST(srcsz1 > slen, "LZ4_compress_destSize() read more than src buffer !"); + + res1 = LZ4_decompress_safe(dbuf1, sbuf1, dsz1, src_buf_size); + FUZ_CHECKTEST(res1 != srcsz1, "LZ4_compress_destSize() decompression failed!"); + + srcsz2 = slen; + memset(sbuf2, payload, slen); + memset(dbuf2, 0, dlen); + dbuf2[dlen] = endchk; + LZ4_resetStreamHC(sHC, compressionLevel); + dsz2 = LZ4_compress_HC_destSize(sHC, sbuf2, dbuf2, &srcsz2, dlen, compressionLevel); + DISPLAYLEVEL(5, "LZ4_compress_HC_destSize: %i bytes compressed into %i bytes, ", srcsz2, dsz2); + DISPLAYLEVEL(5, "last token : 0x%0X, ", dbuf2[dsz2 - 6]); + DISPLAYLEVEL(5, "last ML extra lenbyte : 0x%0X, \n", dbuf2[dsz2 - 7]); + FUZ_CHECKTEST(dbuf2[dlen] != endchk, "LZ4_compress_HC_destSize() overwrite dst buffer !"); + FUZ_CHECKTEST(dsz2 <= 0, "LZ4_compress_HC_destSize() compression failed"); + FUZ_CHECKTEST(dsz2 > dlen, "LZ4_compress_HC_destSize() result larger than dst buffer !"); + FUZ_CHECKTEST(srcsz2 > slen, "LZ4_compress_HC_destSize() read more than src buffer !"); + FUZ_CHECKTEST(dsz2 != dsz1, "LZ4_compress_HC_destSize() return incorrect result !"); + FUZ_CHECKTEST(srcsz2 != srcsz1, "LZ4_compress_HC_destSize() return incorrect src buffer size " + ": srcsz2(%i) != srcsz1(%i)", srcsz2, srcsz1); + FUZ_CHECKTEST(memcmp(dbuf2, dbuf1, (size_t)dsz2), "LZ4_compress_HC_destSize() return incorrect data into dst buffer !"); + + res2 = LZ4_decompress_safe(dbuf2, sbuf1, dsz2, src_buf_size); + FUZ_CHECKTEST(res2 != srcsz1, "LZ4_compress_HC_destSize() decompression failed!"); + + FUZ_CHECKTEST(memcmp(sbuf1, sbuf2, (size_t)res2), "LZ4_compress_HC_destSize() decompression corruption!"); + } + } + LZ4_freeStreamHC(sHC); + free(sbuf1); + free(sbuf2); + free(dbuf1); + free(dbuf2); + } + DISPLAYLEVEL(3, " OK \n"); + + + /* clean up */ + free(testInput); + free(testCompressed); + free(testVerify); + + printf("All unit tests completed successfully compressionLevel=%d \n", compressionLevel); + return; +} + + + +/* ======================================= + * CLI + * ======================================= */ + +static int FUZ_usage(const char* programName) +{ + DISPLAY( "Usage :\n"); + DISPLAY( " %s [args]\n", programName); + DISPLAY( "\n"); + DISPLAY( "Arguments :\n"); + DISPLAY( " -i# : Nb of tests (default:%i) \n", NB_ATTEMPTS); + DISPLAY( " -T# : Duration of tests, in seconds (default: use Nb of tests) \n"); + DISPLAY( " -s# : Select seed (default:prompt user)\n"); + DISPLAY( " -t# : Select starting test number (default:0)\n"); + DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", FUZ_COMPRESSIBILITY_DEFAULT); + DISPLAY( " -v : verbose\n"); + DISPLAY( " -p : pause at the end\n"); + DISPLAY( " -h : display help and exit\n"); + return 0; +} + + +int main(int argc, const char** argv) +{ + U32 seed = 0; + int seedset = 0; + int argNb; + unsigned nbTests = NB_ATTEMPTS; + unsigned testNb = 0; + int proba = FUZ_COMPRESSIBILITY_DEFAULT; + int use_pause = 0; + const char* programName = argv[0]; + U32 duration = 0; + + /* Check command line */ + for(argNb=1; argNb ='0') && (*argument<='9')) { + nbTests *= 10; + nbTests += (unsigned)(*argument - '0'); + argument++; + } + break; + + case 'T': + argument++; + nbTests = 0; duration = 0; + for (;;) { + switch(*argument) + { + case 'm': duration *= 60; argument++; continue; + case 's': + case 'n': argument++; continue; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': duration *= 10; duration += (U32)(*argument++ - '0'); continue; + } + break; + } + break; + + case 's': + argument++; + seed=0; seedset=1; + while ((*argument>='0') && (*argument<='9')) { + seed *= 10; + seed += (U32)(*argument - '0'); + argument++; + } + break; + + case 't': /* select starting test nb */ + argument++; + testNb=0; + while ((*argument>='0') && (*argument<='9')) { + testNb *= 10; + testNb += (unsigned)(*argument - '0'); + argument++; + } + break; + + case 'P': /* change probability */ + argument++; + proba=0; + while ((*argument>='0') && (*argument<='9')) { + proba *= 10; + proba += *argument - '0'; + argument++; + } + if (proba<0) proba=0; + if (proba>100) proba=100; + break; + default: ; + } + } + } + } + + printf("Starting LZ4 fuzzer (%i-bits, v%s)\n", (int)(sizeof(size_t)*8), LZ4_versionString()); + + if (!seedset) { + time_t const t = time(NULL); + U32 const h = XXH32(&t, sizeof(t), 1); + seed = h % 10000; + } + printf("Seed = %u\n", seed); + + if (proba!=FUZ_COMPRESSIBILITY_DEFAULT) printf("Compressibility : %i%%\n", proba); + + if ((seedset==0) && (testNb==0)) { FUZ_unitTests(LZ4HC_CLEVEL_DEFAULT); FUZ_unitTests(LZ4HC_CLEVEL_OPT_MIN); } + + nbTests += (nbTests==0); /* avoid zero */ + + { int const result = FUZ_test(seed, nbTests, testNb, ((double)proba) / 100, duration); + if (use_pause) { + DISPLAY("press enter ... \n"); + (void)getchar(); + } + return result; + } +} diff --git a/tests/roundTripTest.c b/tests/roundTripTest.c new file mode 100644 index 0000000..2d34451 --- /dev/null +++ b/tests/roundTripTest.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * This program takes a file in input, + * performs an LZ4 round-trip test (compress + decompress) + * compares the result with original + * and generates an abort() on corruption detection, + * in order for afl to register the event as a crash. +*/ + + +/*=========================================== +* Tuning Constant +*==========================================*/ +#ifndef MIN_CLEVEL +# define MIN_CLEVEL (int)(-5) +#endif + + + +/*=========================================== +* Dependencies +*==========================================*/ +#include /* size_t */ +#include /* malloc, free, exit */ +#include /* fprintf */ +#include /* strcmp */ +#include +#include /* stat */ +#include /* stat */ +#include "xxhash.h" + +#include "lz4.h" +#include "lz4hc.h" + + +/*=========================================== +* Macros +*==========================================*/ +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MSG(...) fprintf(stderr, __VA_ARGS__) + +#define CONTROL_MSG(c, ...) { \ + if ((c)) { \ + MSG(__VA_ARGS__); \ + MSG(" \n"); \ + abort(); \ + } \ +} + + +static size_t checkBuffers(const void* buff1, const void* buff2, size_t buffSize) +{ + const char* const ip1 = (const char*)buff1; + const char* const ip2 = (const char*)buff2; + size_t pos; + + for (pos=0; pos = LZ4_compressBound(srcSize)` + * for compression to be guaranteed to work */ +static void roundTripTest(void* resultBuff, size_t resultBuffCapacity, + void* compressedBuff, size_t compressedBuffCapacity, + const void* srcBuff, size_t srcSize, + int clevel) +{ + int const proposed_clevel = clevel ? clevel : select_clevel(srcBuff, srcSize); + int const selected_clevel = proposed_clevel < 0 ? -proposed_clevel : proposed_clevel; /* if level < 0, it becomes an accelearion value */ + compressFn compress = selected_clevel >= LZ4HC_CLEVEL_MIN ? LZ4_compress_HC : LZ4_compress_fast; + int const cSize = compress((const char*)srcBuff, (char*)compressedBuff, (int)srcSize, (int)compressedBuffCapacity, selected_clevel); + CONTROL_MSG(cSize == 0, "Compression error !"); + + { int const dSize = LZ4_decompress_safe((const char*)compressedBuff, (char*)resultBuff, cSize, (int)resultBuffCapacity); + CONTROL_MSG(dSize < 0, "Decompression detected an error !"); + CONTROL_MSG(dSize != (int)srcSize, "Decompression corruption error : wrong decompressed size !"); + } + + /* check potential content corruption error */ + assert(resultBuffCapacity >= srcSize); + { size_t const errorPos = checkBuffers(srcBuff, resultBuff, srcSize); + CONTROL_MSG(errorPos != srcSize, + "Silent decoding corruption, at pos %u !!!", + (unsigned)errorPos); + } + +} + +static void roundTripCheck(const void* srcBuff, size_t srcSize, int clevel) +{ + size_t const cBuffSize = LZ4_compressBound((int)srcSize); + void* const cBuff = malloc(cBuffSize); + void* const rBuff = malloc(cBuffSize); + + if (!cBuff || !rBuff) { + fprintf(stderr, "not enough memory ! \n"); + exit(1); + } + + roundTripTest(rBuff, cBuffSize, + cBuff, cBuffSize, + srcBuff, srcSize, + clevel); + + free(rBuff); + free(cBuff); +} + + +static size_t getFileSize(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (r || !(statbuf.st_mode & S_IFREG)) return 0; /* No good... */ +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */ +#endif + return (size_t)statbuf.st_size; +} + + +static int isDirectory(const char* infilename) +{ + int r; +#if defined(_MSC_VER) + struct _stat64 statbuf; + r = _stat64(infilename, &statbuf); + if (!r && (statbuf.st_mode & _S_IFDIR)) return 1; +#else + struct stat statbuf; + r = stat(infilename, &statbuf); + if (!r && S_ISDIR(statbuf.st_mode)) return 1; +#endif + return 0; +} + + +/** loadFile() : + * requirement : `buffer` size >= `fileSize` */ +static void loadFile(void* buffer, const char* fileName, size_t fileSize) +{ + FILE* const f = fopen(fileName, "rb"); + if (isDirectory(fileName)) { + MSG("Ignoring %s directory \n", fileName); + exit(2); + } + if (f==NULL) { + MSG("Impossible to open %s \n", fileName); + exit(3); + } + { size_t const readSize = fread(buffer, 1, fileSize, f); + if (readSize != fileSize) { + MSG("Error reading %s \n", fileName); + exit(5); + } } + fclose(f); +} + + +static void fileCheck(const char* fileName, int clevel) +{ + size_t const fileSize = getFileSize(fileName); + void* const buffer = malloc(fileSize + !fileSize /* avoid 0 */); + if (!buffer) { + MSG("not enough memory \n"); + exit(4); + } + loadFile(buffer, fileName, fileSize); + roundTripCheck(buffer, fileSize, clevel); + free (buffer); +} + + +int bad_usage(const char* exeName) +{ + MSG(" \n"); + MSG("bad usage: \n"); + MSG(" \n"); + MSG("%s [Options] fileName \n", exeName); + MSG(" \n"); + MSG("Options: \n"); + MSG("-# : use #=[0-9] compression level (default:0 == random) \n"); + return 1; +} + + +int main(int argCount, const char** argv) +{ + const char* const exeName = argv[0]; + int argNb = 1; + int clevel = 0; + + assert(argCount >= 1); + if (argCount < 2) return bad_usage(exeName); + + if (argv[1][0] == '-') { + clevel = argv[1][1] - '0'; + argNb = 2; + } + + if (argNb >= argCount) return bad_usage(exeName); + + fileCheck(argv[argNb], clevel); + MSG("no pb detected \n"); + return 0; +} diff --git a/tests/test-lz4-list.py b/tests/test-lz4-list.py new file mode 100644 index 0000000..ce89757 --- /dev/null +++ b/tests/test-lz4-list.py @@ -0,0 +1,282 @@ +#! /usr/bin/env python3 +import subprocess +import time +import glob +import os +import tempfile +import unittest + +SIZES = [3, 11] # Always 2 sizes +MIB = 1048576 +LZ4 = os.path.dirname(os.path.realpath(__file__)) + "/../lz4" +if not os.path.exists(LZ4): + LZ4 = os.path.dirname(os.path.realpath(__file__)) + "/../programs/lz4" +TEMP = tempfile.gettempdir() + + +class NVerboseFileInfo(object): + def __init__(self, line_in): + self.line = line_in + splitlines = line_in.split() + if len(splitlines) != 7: + errout("Unexpected line: {}".format(line_in)) + self.frames, self.type, self.block, self.compressed, self.uncompressed, self.ratio, self.filename = splitlines + self.exp_unc_size = 0 + # Get real file sizes + if "concat-all" in self.filename or "2f--content-size" in self.filename: + for i in SIZES: + self.exp_unc_size += os.path.getsize("{}/test_list_{}M".format(TEMP, i)) + else: + uncompressed_filename = self.filename.split("-")[0] + self.exp_unc_size += os.path.getsize("{}/{}".format(TEMP, uncompressed_filename)) + self.exp_comp_size = os.path.getsize("{}/{}".format(TEMP, self.filename)) + + +class TestNonVerbose(unittest.TestCase): + @classmethod + def setUpClass(self): + self.nvinfo_list = [] + for i, line in enumerate(execute("{} --list -m {}/test_list_*.lz4".format(LZ4, TEMP), print_output=True)): + if i > 0: + self.nvinfo_list.append(NVerboseFileInfo(line)) + + def test_frames(self): + all_concat_frames = 0 + all_concat_index = None + for i, nvinfo in enumerate(self.nvinfo_list): + if "concat-all" in nvinfo.filename: + all_concat_index = i + elif "2f--content-size" in nvinfo.filename: + self.assertEqual("2", nvinfo.frames, nvinfo.line) + all_concat_frames += 2 + else: + self.assertEqual("1", nvinfo.frames, nvinfo.line) + all_concat_frames += 1 + self.assertNotEqual(None, all_concat_index, "Couldn't find concat-all file index.") + self.assertEqual(self.nvinfo_list[all_concat_index].frames, str(all_concat_frames), self.nvinfo_list[all_concat_index].line) + + def test_frame_types(self): + for nvinfo in self.nvinfo_list: + if "-lz4f-" in nvinfo.filename: + self.assertEqual(nvinfo.type, "LZ4Frame", nvinfo.line) + elif "-legc-" in nvinfo.filename: + self.assertEqual(nvinfo.type, "LegacyFrame", nvinfo.line) + elif "-skip-" in nvinfo.filename: + self.assertEqual(nvinfo.type, "SkippableFrame", nvinfo.line) + + def test_block(self): + for nvinfo in self.nvinfo_list: + # if "-leg" in nvinfo.filename or "-skip" in nvinfo.filename: + # self.assertEqual(nvinfo.block, "-", nvinfo.line) + if "--BD" in nvinfo.filename: + self.assertRegex(nvinfo.block, "^B[0-9]+D$", nvinfo.line) + elif "--BI" in nvinfo.filename: + self.assertRegex(nvinfo.block, "^B[0-9]+I$", nvinfo.line) + + def test_compressed_size(self): + for nvinfo in self.nvinfo_list: + self.assertEqual(nvinfo.compressed, to_human(nvinfo.exp_comp_size), nvinfo.line) + + def test_ratio(self): + for nvinfo in self.nvinfo_list: + if "--content-size" in nvinfo.filename: + self.assertEqual(nvinfo.ratio, "{:.2f}%".format(float(nvinfo.exp_comp_size) / float(nvinfo.exp_unc_size) * 100), nvinfo.line) + + def test_uncompressed_size(self): + for nvinfo in self.nvinfo_list: + if "--content-size" in nvinfo.filename: + self.assertEqual(nvinfo.uncompressed, to_human(nvinfo.exp_unc_size), nvinfo.line) + + +class VerboseFileInfo(object): + def __init__(self, lines): + # Parse lines + self.frame_list = [] + self.file_frame_map = [] + for i, line in enumerate(lines): + if i == 0: + self.filename = line + continue + elif i == 1: + # Skip header + continue + frame_info = dict(zip(["frame", "type", "block", "checksum", "compressed", "uncompressed", "ratio"], line.split())) + frame_info["line"] = line + self.frame_list.append(frame_info) + + +class TestVerbose(unittest.TestCase): + @classmethod + def setUpClass(self): + # Even do we're listing 2 files to test multiline working as expected. + # we're only really interested in testing the output of the concat-all file. + self.vinfo_list = [] + start = end = 0 + output = execute("{} --list -m -v {}/test_list_concat-all.lz4 {}/test_list_*M-lz4f-2f--content-size.lz4".format(LZ4, TEMP, TEMP), print_output=True) + for i, line in enumerate(output): + if line.startswith("test_list"): + if start != 0 and end != 0: + self.vinfo_list.append(VerboseFileInfo(output[start:end])) + start = i + if not line: + end = i + self.vinfo_list.append(VerboseFileInfo(output[start:end])) + # Populate file_frame_map as a reference of the expected info + concat_file_list = glob.glob("/tmp/test_list_[!concat]*.lz4") + # One of the files has 2 frames so duplicate it in this list to map each frame 1 to a single file + for i, filename in enumerate(concat_file_list): + if "2f--content-size" in filename: + concat_file_list.insert(i, filename) + break + self.cvinfo = self.vinfo_list[0] + self.cvinfo.file_frame_map = concat_file_list + self.cvinfo.compressed_size = os.path.getsize("{}/test_list_concat-all.lz4".format(TEMP)) + + def test_filename(self): + for i, vinfo in enumerate(self.vinfo_list): + self.assertRegex(vinfo.filename, "^test_list_.*({}/{})".format(i + 1, len(self.vinfo_list))) + + def test_frame_number(self): + for vinfo in self.vinfo_list: + for i, frame_info in enumerate(vinfo.frame_list): + self.assertEqual(frame_info["frame"], str(i + 1), frame_info["line"]) + + def test_frame_type(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "-lz4f-" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["type"], "LZ4Frame", self.cvinfo.frame_list[i]["line"]) + elif "-legc-" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["type"], "LegacyFrame", self.cvinfo.frame_list[i]["line"]) + elif "-skip-" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["type"], "SkippableFrame", self.cvinfo.frame_list[i]["line"]) + + def test_block(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "--BD" in self.cvinfo.file_frame_map[i]: + self.assertRegex(self.cvinfo.frame_list[i]["block"], "^B[0-9]+D$", self.cvinfo.frame_list[i]["line"]) + elif "--BI" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["block"], "^B[0-9]+I$", self.cvinfo.frame_list[i]["line"]) + + def test_checksum(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "-lz4f-" in self.cvinfo.file_frame_map[i] and "--no-frame-crc" not in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]["checksum"], "XXH32", self.cvinfo.frame_list[i]["line"]) + + def test_compressed(self): + total = 0 + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "-2f-" not in self.cvinfo.file_frame_map[i]: + expected_size = os.path.getsize(self.cvinfo.file_frame_map[i]) + self.assertEqual(self.cvinfo.frame_list[i]["compressed"], str(expected_size), self.cvinfo.frame_list[i]["line"]) + total += int(self.cvinfo.frame_list[i]["compressed"]) + self.assertEqual(total, self.cvinfo.compressed_size, "Expected total sum ({}) to match {} filesize".format(total, self.cvinfo.filename)) + + def test_uncompressed(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + ffm = self.cvinfo.file_frame_map[i] + if "-2f-" not in ffm and "--content-size" in ffm: + expected_size_unc = int(ffm[ffm.rindex("_") + 1:ffm.index("M")]) * 1048576 + self.assertEqual(self.cvinfo.frame_list[i]["uncompressed"], str(expected_size_unc), self.cvinfo.frame_list[i]["line"]) + + def test_ratio(self): + for i, frame_info in enumerate(self.cvinfo.frame_list): + if "--content-size" in self.cvinfo.file_frame_map[i]: + self.assertEqual(self.cvinfo.frame_list[i]['ratio'], + "{:.2f}%".format(float(self.cvinfo.frame_list[i]['compressed']) / float(self.cvinfo.frame_list[i]['uncompressed']) * 100), + self.cvinfo.frame_list[i]["line"]) + + +def to_human(size): + for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y']: + if size < 1024.0: + break + size /= 1024.0 + return "{:.2f}{}".format(size, unit) + + +def log(text): + print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text) + + +def errout(text, err=1): + log(text) + exit(err) + + +def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True): + if os.environ.get('QEMU_SYS'): + command = "{} {}".format(os.environ['QEMU_SYS'], command) + if print_command: + log("> " + command) + popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell) + stdout_lines, stderr_lines = popen.communicate() + stderr_lines = stderr_lines.decode("utf-8") + stdout_lines = stdout_lines.decode("utf-8") + if print_output: + if stdout_lines: + print(stdout_lines) + if stderr_lines: + print(stderr_lines) + if popen.returncode is not None and popen.returncode != 0: + if stderr_lines and not print_output and print_error: + print(stderr_lines) + errout("Failed to run: {}\n".format(command, stdout_lines + stderr_lines)) + return (stdout_lines + stderr_lines).splitlines() + + +def cleanup(silent=False): + for f in glob.glob("{}/test_list*".format(TEMP)): + if not silent: + log("Deleting {}".format(f)) + os.unlink(f) + + +def datagen(file_name, size): + non_sparse_size = size // 2 + sparse_size = size - non_sparse_size + with open(file_name, "wb") as f: + f.seek(sparse_size) + f.write(os.urandom(non_sparse_size)) + + +def generate_files(): + # file format ~ test_list - f .lz4 ~ + # Generate LZ4Frames + for i in SIZES: + filename = "{}/test_list_{}M".format(TEMP, i) + log("Generating {}".format(filename)) + datagen(filename, i * MIB) + for j in ["--content-size", "-BI", "-BD", "-BX", "--no-frame-crc"]: + lz4file = "{}-lz4f-1f{}.lz4".format(filename, j) + execute("{} {} {} {}".format(LZ4, j, filename, lz4file)) + # Generate skippable frames + lz4file = "{}-skip-1f.lz4".format(filename) + skipsize = i * 1024 + skipbytes = bytes([80, 42, 77, 24]) + skipsize.to_bytes(4, byteorder='little', signed=False) + with open(lz4file, 'wb') as f: + f.write(skipbytes) + f.write(os.urandom(skipsize)) + # Generate legacy frames + lz4file = "{}-legc-1f.lz4".format(filename) + execute("{} -l {} {}".format(LZ4, filename, lz4file)) + + # Concatenate --content-size files + file_list = glob.glob("{}/test_list_*-lz4f-1f--content-size.lz4".format(TEMP)) + with open("{}/test_list_{}M-lz4f-2f--content-size.lz4".format(TEMP, sum(SIZES)), 'ab') as outfile: + for fname in file_list: + with open(fname, 'rb') as infile: + outfile.write(infile.read()) + + # Concatenate all files + file_list = glob.glob("{}/test_list_*.lz4".format(TEMP)) + with open("{}/test_list_concat-all.lz4".format(TEMP), 'ab') as outfile: + for fname in file_list: + with open(fname, 'rb') as infile: + outfile.write(infile.read()) + + +if __name__ == '__main__': + cleanup() + generate_files() + unittest.main(verbosity=2, exit=False) + cleanup(silent=True) diff --git a/tests/test-lz4-speed.py b/tests/test-lz4-speed.py new file mode 100644 index 0000000..ca8f010 --- /dev/null +++ b/tests/test-lz4-speed.py @@ -0,0 +1,351 @@ +#! /usr/bin/env python3 + +# +# Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. +# + +# Limitations: +# - doesn't support filenames with spaces +# - dir1/lz4 and dir2/lz4 will be merged in a single results file + +import argparse +import os +import string +import subprocess +import time +import traceback +import hashlib + +script_version = 'v1.7.2 (2016-11-08)' +default_repo_url = 'https://github.com/lz4/lz4.git' +working_dir_name = 'speedTest' +working_path = os.getcwd() + '/' + working_dir_name # /path/to/lz4/tests/speedTest +clone_path = working_path + '/' + 'lz4' # /path/to/lz4/tests/speedTest/lz4 +email_header = 'lz4_speedTest' +pid = str(os.getpid()) +verbose = False +clang_version = "unknown" +gcc_version = "unknown" +args = None + + +def hashfile(hasher, fname, blocksize=65536): + with open(fname, "rb") as f: + for chunk in iter(lambda: f.read(blocksize), b""): + hasher.update(chunk) + return hasher.hexdigest() + + +def log(text): + print(time.strftime("%Y/%m/%d %H:%M:%S") + ' - ' + text) + + +def execute(command, print_command=True, print_output=False, print_error=True, param_shell=True): + if print_command: + log("> " + command) + popen = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=param_shell, cwd=execute.cwd) + stdout_lines, stderr_lines = popen.communicate(timeout=args.timeout) + stderr_lines = stderr_lines.decode("utf-8") + stdout_lines = stdout_lines.decode("utf-8") + if print_output: + if stdout_lines: + print(stdout_lines) + if stderr_lines: + print(stderr_lines) + if popen.returncode is not None and popen.returncode != 0: + if stderr_lines and not print_output and print_error: + print(stderr_lines) + raise RuntimeError(stdout_lines + stderr_lines) + return (stdout_lines + stderr_lines).splitlines() +execute.cwd = None + + +def does_command_exist(command): + try: + execute(command, verbose, False, False) + except Exception: + return False + return True + + +def send_email(emails, topic, text, have_mutt, have_mail): + logFileName = working_path + '/' + 'tmpEmailContent' + with open(logFileName, "w") as myfile: + myfile.writelines(text) + myfile.close() + if have_mutt: + execute('mutt -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) + elif have_mail: + execute('mail -s "' + topic + '" ' + emails + ' < ' + logFileName, verbose) + else: + log("e-mail cannot be sent (mail or mutt not found)") + + +def send_email_with_attachments(branch, commit, last_commit, args, text, results_files, + logFileName, have_mutt, have_mail): + with open(logFileName, "w") as myfile: + myfile.writelines(text) + myfile.close() + email_topic = '[%s:%s] Warning for %s:%s last_commit=%s speed<%s ratio<%s' \ + % (email_header, pid, branch, commit, last_commit, + args.lowerLimit, args.ratioLimit) + if have_mutt: + execute('mutt -s "' + email_topic + '" ' + args.emails + ' -a ' + results_files + + ' < ' + logFileName) + elif have_mail: + execute('mail -s "' + email_topic + '" ' + args.emails + ' < ' + logFileName) + else: + log("e-mail cannot be sent (mail or mutt not found)") + + +def git_get_branches(): + execute('git fetch -p', verbose) + branches = execute('git branch -rl', verbose) + output = [] + for line in branches: + if ("HEAD" not in line) and ("coverity_scan" not in line) and ("gh-pages" not in line): + output.append(line.strip()) + return output + + +def git_get_changes(branch, commit, last_commit): + fmt = '--format="%h: (%an) %s, %ar"' + if last_commit is None: + commits = execute('git log -n 10 %s %s' % (fmt, commit)) + else: + commits = execute('git --no-pager log %s %s..%s' % (fmt, last_commit, commit)) + return str('Changes in %s since %s:\n' % (branch, last_commit)) + '\n'.join(commits) + + +def get_last_results(resultsFileName): + if not os.path.isfile(resultsFileName): + return None, None, None, None + commit = None + csize = [] + cspeed = [] + dspeed = [] + with open(resultsFileName, 'r') as f: + for line in f: + words = line.split() + if len(words) <= 4: # branch + commit + compilerVer + md5 + commit = words[1] + csize = [] + cspeed = [] + dspeed = [] + if (len(words) == 8) or (len(words) == 9): # results: "filename" or "XX files" + csize.append(int(words[1])) + cspeed.append(float(words[3])) + dspeed.append(float(words[5])) + return commit, csize, cspeed, dspeed + + +def benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, + testFilePath, fileName, last_csize, last_cspeed, last_dspeed): + sleepTime = 30 + while os.getloadavg()[0] > args.maxLoadAvg: + log("WARNING: bench loadavg=%.2f is higher than %s, sleeping for %s seconds" + % (os.getloadavg()[0], args.maxLoadAvg, sleepTime)) + time.sleep(sleepTime) + start_load = str(os.getloadavg()) + result = execute('programs/%s -rqi5b1e%s %s' % (executableName, args.lastCLevel, testFilePath), print_output=True) + end_load = str(os.getloadavg()) + linesExpected = args.lastCLevel + 1 + if len(result) != linesExpected: + raise RuntimeError("ERROR: number of result lines=%d is different that expected %d\n%s" % (len(result), linesExpected, '\n'.join(result))) + with open(resultsFileName, "a") as myfile: + myfile.write('%s %s %s md5=%s\n' % (branch, commit, compilerVersion, md5sum)) + myfile.write('\n'.join(result) + '\n') + myfile.close() + if (last_cspeed == None): + log("WARNING: No data for comparison for branch=%s file=%s " % (branch, fileName)) + return "" + commit, csize, cspeed, dspeed = get_last_results(resultsFileName) + text = "" + for i in range(0, min(len(cspeed), len(last_cspeed))): + print("%s:%s -%d cSpeed=%6.2f cLast=%6.2f cDiff=%1.4f dSpeed=%6.2f dLast=%6.2f dDiff=%1.4f ratioDiff=%1.4f %s" % (branch, commit, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], float(last_csize[i])/csize[i], fileName)) + if (cspeed[i]/last_cspeed[i] < args.lowerLimit): + text += "WARNING: %s -%d cSpeed=%.2f cLast=%.2f cDiff=%.4f %s\n" % (executableName, i+1, cspeed[i], last_cspeed[i], cspeed[i]/last_cspeed[i], fileName) + if (dspeed[i]/last_dspeed[i] < args.lowerLimit): + text += "WARNING: %s -%d dSpeed=%.2f dLast=%.2f dDiff=%.4f %s\n" % (executableName, i+1, dspeed[i], last_dspeed[i], dspeed[i]/last_dspeed[i], fileName) + if (float(last_csize[i])/csize[i] < args.ratioLimit): + text += "WARNING: %s -%d cSize=%d last_cSize=%d diff=%.4f %s\n" % (executableName, i+1, csize[i], last_csize[i], float(last_csize[i])/csize[i], fileName) + if text: + text = args.message + ("\nmaxLoadAvg=%s load average at start=%s end=%s\n%s last_commit=%s md5=%s\n" % (args.maxLoadAvg, start_load, end_load, compilerVersion, last_commit, md5sum)) + text + return text + + +def update_config_file(branch, commit): + last_commit = None + commitFileName = working_path + "/commit_" + branch.replace("/", "_") + ".txt" + if os.path.isfile(commitFileName): + with open(commitFileName, 'r') as infile: + last_commit = infile.read() + with open(commitFileName, 'w') as outfile: + outfile.write(commit) + return last_commit + + +def double_check(branch, commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName): + last_commit, csize, cspeed, dspeed = get_last_results(resultsFileName) + if not args.dry_run: + text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) + if text: + log("WARNING: redoing tests for branch %s: commit %s" % (branch, commit)) + text = benchmark_and_compare(branch, commit, last_commit, args, executableName, md5sum, compilerVersion, resultsFileName, filePath, fileName, csize, cspeed, dspeed) + return text + + +def test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail): + local_branch = branch.split('/')[1] + version = local_branch.rpartition('-')[2] + '_' + commit + if not args.dry_run: + execute('make -C programs clean lz4 CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion -DLZ4_GIT_COMMIT=%s" && ' % version + + 'mv programs/lz4 programs/lz4_clang && ' + + 'make -C programs clean lz4 lz4c32 MOREFLAGS="-DLZ4_GIT_COMMIT=%s"' % version) + md5_lz4 = hashfile(hashlib.md5(), clone_path + '/programs/lz4') + md5_lz4c32 = hashfile(hashlib.md5(), clone_path + '/programs/lz4c32') + md5_lz4_clang = hashfile(hashlib.md5(), clone_path + '/programs/lz4_clang') + print("md5(lz4)=%s\nmd5(lz4c32)=%s\nmd5(lz4_clang)=%s" % (md5_lz4, md5_lz4c32, md5_lz4_clang)) + print("gcc_version=%s clang_version=%s" % (gcc_version, clang_version)) + + logFileName = working_path + "/log_" + branch.replace("/", "_") + ".txt" + text_to_send = [] + results_files = "" + + for filePath in testFilePaths: + fileName = filePath.rpartition('/')[2] + resultsFileName = working_path + "/results_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'lz4', md5_lz4, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + resultsFileName = working_path + "/results32_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'lz4c32', md5_lz4c32, 'gcc_version='+gcc_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + resultsFileName = working_path + "/resultsClang_" + branch.replace("/", "_") + "_" + fileName.replace(".", "_") + ".txt" + text = double_check(branch, commit, args, 'lz4_clang', md5_lz4_clang, 'clang_version='+clang_version, resultsFileName, filePath, fileName) + if text: + text_to_send.append(text) + results_files += resultsFileName + " " + if text_to_send: + send_email_with_attachments(branch, commit, last_commit, args, text_to_send, results_files, logFileName, have_mutt, have_mail) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('testFileNames', help='file or directory names list for speed benchmark') + parser.add_argument('emails', help='list of e-mail addresses to send warnings') + parser.add_argument('--message', '-m', help='attach an additional message to e-mail', default="") + parser.add_argument('--repoURL', help='changes default repository URL', default=default_repo_url) + parser.add_argument('--lowerLimit', '-l', type=float, help='send email if speed is lower than given limit', default=0.98) + parser.add_argument('--ratioLimit', '-r', type=float, help='send email if ratio is lower than given limit', default=0.999) + parser.add_argument('--maxLoadAvg', type=float, help='maximum load average to start testing', default=0.75) + parser.add_argument('--lastCLevel', type=int, help='last compression level for testing', default=5) + parser.add_argument('--sleepTime', '-s', type=int, help='frequency of repository checking in seconds', default=300) + parser.add_argument('--timeout', '-t', type=int, help='timeout for executing shell commands', default=1800) + parser.add_argument('--dry-run', dest='dry_run', action='store_true', help='not build', default=False) + parser.add_argument('--verbose', '-v', action='store_true', help='more verbose logs', default=False) + args = parser.parse_args() + verbose = args.verbose + + # check if test files are accessible + testFileNames = args.testFileNames.split() + testFilePaths = [] + for fileName in testFileNames: + fileName = os.path.expanduser(fileName) + if os.path.isfile(fileName) or os.path.isdir(fileName): + testFilePaths.append(os.path.abspath(fileName)) + else: + log("ERROR: File/directory not found: " + fileName) + exit(1) + + # check availability of e-mail senders + have_mutt = does_command_exist("mutt -h") + have_mail = does_command_exist("mail -V") + if not have_mutt and not have_mail: + log("ERROR: e-mail senders 'mail' or 'mutt' not found") + exit(1) + + clang_version = execute("clang -v 2>&1 | grep 'clang version' | sed -e 's:.*version \\([0-9.]*\\).*:\\1:' -e 's:\\.\\([0-9][0-9]\\):\\1:g'", verbose)[0]; + gcc_version = execute("gcc -dumpversion", verbose)[0]; + + if verbose: + print("PARAMETERS:\nrepoURL=%s" % args.repoURL) + print("working_path=%s" % working_path) + print("clone_path=%s" % clone_path) + print("testFilePath(%s)=%s" % (len(testFilePaths), testFilePaths)) + print("message=%s" % args.message) + print("emails=%s" % args.emails) + print("maxLoadAvg=%s" % args.maxLoadAvg) + print("lowerLimit=%s" % args.lowerLimit) + print("ratioLimit=%s" % args.ratioLimit) + print("lastCLevel=%s" % args.lastCLevel) + print("sleepTime=%s" % args.sleepTime) + print("timeout=%s" % args.timeout) + print("dry_run=%s" % args.dry_run) + print("verbose=%s" % args.verbose) + print("have_mutt=%s have_mail=%s" % (have_mutt, have_mail)) + + # clone lz4 repo if needed + if not os.path.isdir(working_path): + os.mkdir(working_path) + if not os.path.isdir(clone_path): + execute.cwd = working_path + execute('git clone ' + args.repoURL) + if not os.path.isdir(clone_path): + log("ERROR: lz4 clone not found: " + clone_path) + exit(1) + execute.cwd = clone_path + + # check if speedTest.pid already exists + pidfile = "./speedTest.pid" + if os.path.isfile(pidfile): + log("ERROR: %s already exists, exiting" % pidfile) + exit(1) + + send_email(args.emails, '[%s:%s] test-lz4-speed.py %s has been started' % (email_header, pid, script_version), args.message, have_mutt, have_mail) + with open(pidfile, 'w') as the_file: + the_file.write(pid) + + branch = "" + commit = "" + first_time = True + while True: + try: + if first_time: + first_time = False + else: + if verbose: + log("sleep for %s seconds" % args.sleepTime) + time.sleep(args.sleepTime) + loadavg = os.getloadavg()[0] + if (loadavg <= args.maxLoadAvg): + branches = git_get_branches() + for branch in branches: + commit = execute('git show -s --format=%h ' + branch, verbose)[0] + last_commit = update_config_file(branch, commit) + if commit == last_commit: + log("skipping branch %s: head %s already processed" % (branch, commit)) + else: + log("build branch %s: head %s is different from prev %s" % (branch, commit, last_commit)) + execute('git checkout -- . && git checkout ' + branch) + print(git_get_changes(branch, commit, last_commit)) + test_commit(branch, commit, last_commit, args, testFilePaths, have_mutt, have_mail) + else: + log("WARNING: main loadavg=%.2f is higher than %s" % (loadavg, args.maxLoadAvg)) + except Exception as e: + stack = traceback.format_exc() + email_topic = '[%s:%s] ERROR in %s:%s' % (email_header, pid, branch, commit) + send_email(args.emails, email_topic, stack, have_mutt, have_mail) + print(stack) + except KeyboardInterrupt: + os.unlink(pidfile) + send_email(args.emails, '[%s:%s] test-lz4-speed.py %s has been stopped' % (email_header, pid, script_version), args.message, have_mutt, have_mail) + exit(0) diff --git a/tests/test-lz4-versions.py b/tests/test-lz4-versions.py new file mode 100644 index 0000000..d7fd199 --- /dev/null +++ b/tests/test-lz4-versions.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python3 +"""Test LZ4 interoperability between versions""" + +# +# Copyright (C) 2011-present, Takayuki Matsuoka +# All rights reserved. +# GPL v2 License +# + +import glob +import subprocess +import filecmp +import os +import shutil +import sys +import hashlib + +repo_url = 'https://github.com/lz4/lz4.git' +tmp_dir_name = 'tests/versionsTest' +make_cmd = 'make' +git_cmd = 'git' +test_dat_src = 'README.md' +test_dat = 'test_dat' +head = 'v999' + +def proc(cmd_args, pipe=True, dummy=False): + if dummy: + return + if pipe: + subproc = subprocess.Popen(cmd_args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + else: + subproc = subprocess.Popen(cmd_args) + return subproc.communicate() + +def make(args, pipe=True): + return proc([make_cmd] + args, pipe) + +def git(args, pipe=True): + return proc([git_cmd] + args, pipe) + +def get_git_tags(): + stdout, stderr = git(['tag', '-l', 'r[0-9][0-9][0-9]']) + tags = stdout.decode('utf-8').split() + stdout, stderr = git(['tag', '-l', 'v[1-9].[0-9].[0-9]']) + tags += stdout.decode('utf-8').split() + return tags + +# https://stackoverflow.com/a/19711609/2132223 +def sha1_of_file(filepath): + with open(filepath, 'rb') as f: + return hashlib.sha1(f.read()).hexdigest() + +if __name__ == '__main__': + error_code = 0 + base_dir = os.getcwd() + '/..' # /path/to/lz4 + tmp_dir = base_dir + '/' + tmp_dir_name # /path/to/lz4/tests/versionsTest + clone_dir = tmp_dir + '/' + 'lz4' # /path/to/lz4/tests/versionsTest/lz4 + programs_dir = base_dir + '/programs' # /path/to/lz4/programs + os.makedirs(tmp_dir, exist_ok=True) + + # since Travis clones limited depth, we should clone full repository + if not os.path.isdir(clone_dir): + git(['clone', repo_url, clone_dir]) + + shutil.copy2(base_dir + '/' + test_dat_src, tmp_dir + '/' + test_dat) + + # Retrieve all release tags + print('Retrieve all release tags :') + os.chdir(clone_dir) + tags = [head] + get_git_tags() + print(tags); + + # Build all release lz4c and lz4c32 + for tag in tags: + os.chdir(base_dir) + dst_lz4c = '{}/lz4c.{}' .format(tmp_dir, tag) # /path/to/lz4/test/lz4test/lz4c. + dst_lz4c32 = '{}/lz4c32.{}'.format(tmp_dir, tag) # /path/to/lz4/test/lz4test/lz4c32. + if not os.path.isfile(dst_lz4c) or not os.path.isfile(dst_lz4c32) or tag == head: + if tag != head: + r_dir = '{}/{}'.format(tmp_dir, tag) # /path/to/lz4/test/lz4test/ + os.makedirs(r_dir, exist_ok=True) + os.chdir(clone_dir) + git(['--work-tree=' + r_dir, 'checkout', tag, '--', '.'], False) + os.chdir(r_dir + '/programs') # /path/to/lz4/lz4test/ /programs + else: + os.chdir(programs_dir) + make(['clean', 'lz4c'], False) + shutil.copy2('lz4c', dst_lz4c) + make(['clean', 'lz4c32'], False) + shutil.copy2('lz4c32', dst_lz4c32) + + # Compress test.dat by all released lz4c and lz4c32 + print('Compress test.dat by all released lz4c and lz4c32') + os.chdir(tmp_dir) + for lz4 in glob.glob("*.lz4"): + os.remove(lz4) + for tag in tags: + proc(['./lz4c.' + tag, '-1fz', test_dat, test_dat + '_1_64_' + tag + '.lz4']) + proc(['./lz4c.' + tag, '-9fz', test_dat, test_dat + '_9_64_' + tag + '.lz4']) + proc(['./lz4c32.' + tag, '-1fz', test_dat, test_dat + '_1_32_' + tag + '.lz4']) + proc(['./lz4c32.' + tag, '-9fz', test_dat, test_dat + '_9_32_' + tag + '.lz4']) + + print('Full list of compressed files') + lz4s = sorted(glob.glob('*.lz4')) + for lz4 in lz4s: + print(lz4 + ' : ' + repr(os.path.getsize(lz4))) + + # Remove duplicated .lz4 files + print('') + print('Duplicated files') + lz4s = sorted(glob.glob('*.lz4')) + for i, lz4 in enumerate(lz4s): + if not os.path.isfile(lz4): + continue + for j in range(i+1, len(lz4s)): + lz4t = lz4s[j] + if not os.path.isfile(lz4t): + continue + if filecmp.cmp(lz4, lz4t): + os.remove(lz4t) + print('{} == {}'.format(lz4, lz4t)) + + print('Enumerate only different compressed files') + lz4s = sorted(glob.glob('*.lz4')) + for lz4 in lz4s: + print(lz4 + ' : ' + repr(os.path.getsize(lz4)) + ', ' + sha1_of_file(lz4)) + + # Decompress remained .lz4 files by all released lz4c and lz4c32 + print('Decompression tests and verifications') + lz4s = sorted(glob.glob('*.lz4')) + for dec in glob.glob("*.dec"): + os.remove(dec) + for lz4 in lz4s: + print(lz4, end=" ") + for tag in tags: + print(tag, end=" ") + proc(['./lz4c.' + tag, '-df', lz4, lz4 + '_d64_' + tag + '.dec']) + proc(['./lz4c32.' + tag, '-df', lz4, lz4 + '_d32_' + tag + '.dec']) + print(' OK') # well, here, decompression has worked; but file is not yet verified + + # Compare all '.dec' files with test_dat + decs = glob.glob('*.dec') + for dec in decs: + if not filecmp.cmp(dec, test_dat): + print('ERR : ' + dec) + error_code = 1 + else: + print('OK : ' + dec) + os.remove(dec) + + if error_code != 0: + print('ERROR') + + sys.exit(error_code) diff --git a/tests/test_custom_block_sizes.sh b/tests/test_custom_block_sizes.sh new file mode 100644 index 0000000..aba6733 --- /dev/null +++ b/tests/test_custom_block_sizes.sh @@ -0,0 +1,72 @@ +#/usr/bin/env sh +set -e + +LZ4=../lz4 +CHECKFRAME=./checkFrame +DATAGEN=./datagen + +failures="" + +TMPFILE=/tmp/test_custom_block_sizes.$$ +TMPFILE1=/tmp/test_custom_block_sizes1.$$ +TMPFILE2=/tmp/test_custom_block_sizes2.$$ +$DATAGEN -g12345678 > $TMPFILE1 +$DATAGEN -g12345678 > $TMPFILE2 + +echo Testing -B31 +$LZ4 -f -B31 $TMPFILE1 && failures="31 (should fail) " + +for blocksize in 32 65535 65536 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b4 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 65537 262143 262144 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b5 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 262145 1048575 1048576 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b6 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 1048577 4194303 4194304 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B$blocksize -b7 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +for blocksize in 4194305 10485760 +do + echo Testing -B$blocksize + $LZ4 -f -B$blocksize $TMPFILE1 + $LZ4 -f -B$blocksize $TMPFILE2 + cat $TMPFILE1.lz4 $TMPFILE2.lz4 > $TMPFILE.lz4 + $CHECKFRAME -B4194304 -b7 $TMPFILE.lz4 || failures="$failures $blocksize " +done + +rm $TMPFILE.lz4 $TMPFILE1 $TMPFILE1.lz4 $TMPFILE2 $TMPFILE2.lz4 +if [ "$failures" == "" ] +then + echo ---- All tests passed + exit 0 +else + echo ---- The following tests had failures: $failures + exit 1 +fi diff --git a/tests/test_install.sh b/tests/test_install.sh new file mode 100644 index 0000000..122bac5 --- /dev/null +++ b/tests/test_install.sh @@ -0,0 +1,28 @@ +#/usr/bin/env sh +set -e + + +make="make -C $lz4_root" +unamestr=$(uname) +if [ "$unamestr" = 'Linux' ]; then + make="make -C $lz4_root" +elif [ "$unamestr" = 'FreeBSD' -o "$unamestr" = 'OpenBSD' ]; then + make="gmake -C $lz4_root" +fi + +for cmd in install uninstall; do + for upper in DUMMY PREFIX EXEC_PREFIX LIBDIR INCLUDEDIR PKGCONFIGDIR BINDIR MANDIR MAN1DIR ; do + lower=$(echo $upper | tr '[:upper:]' '[:lower:]') + tmp_lower="$(pwd)/tmp-lower-$lower/" + tmp_upper="$(pwd)/tmp-upper-$lower/" + echo $make $cmd DESTDIR="$tmp_upper" $upper="test" + $make $cmd DESTDIR="$tmp_upper" $upper="test" >/dev/null + echo $make $cmd DESTDIR="$tmp_lower" $lower="test" + $make $cmd DESTDIR="$tmp_lower" $lower="test" >/dev/null + command diff -r "$tmp_lower" "$tmp_upper" && echo "SAME!" || false + if [ "x$cmd" = "xuninstall" ]; then + test -z "$(find "$tmp_lower" -type f)" && echo "EMPTY!" || false + rm -rf "$tmp_upper" "$tmp_lower" + fi + done +done diff --git a/tmp b/tmp new file mode 100644 index 0000000..c97c12f Binary files /dev/null and b/tmp differ diff --git a/tmpsparse b/tmpsparse new file mode 100644 index 0000000..c97c12f Binary files /dev/null and b/tmpsparse differ diff --git a/visual/.gitignore b/visual/.gitignore deleted file mode 100644 index 276f8f5..0000000 --- a/visual/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -# Visual C++ -.vs/ -*Copy -*.db -*.opensdf -*.sdf -*.suo -*.user -ver*/ -VS2010/bin/