diff --git a/config-user.mk.acr b/config-user.mk.acr index 012f08c610..1285630f41 100644 --- a/config-user.mk.acr +++ b/config-user.mk.acr @@ -102,6 +102,7 @@ LIBZIP=@LIBZIP@ LIBXXHASH=@LIBXXHASH@ USE_SYSLZ4=@USE_SYSLZ4@ +USE_SMALLZ4=@USE_SMALLZ4@ ifeq ($(HAVE_LIB_SSL),1) SSL_CFLAGS=@SSL_CFLAGS@ diff --git a/configure b/configure index 74834ad397..f332318f5a 100755 --- a/configure +++ b/configure @@ -35,6 +35,7 @@ USE_CS5=0 USE_CS4=0 WITH_CAPSTONE=0 WITH_SYSLZ4=0 +WITH_SMALLZ4=0 USE_ZIP=0 USE_XXHASH=0 WITH_GPL=1 @@ -188,8 +189,7 @@ System types: --target=TARGET configure for building compilers for TARGET [HOST] EOF2 -printf " -Optional Features: +printf "\nOptional Features: --disable-debugger disable native debugger features --with-sysmagic force to use system's magic --disable-threads disable use of thread apis @@ -207,6 +207,7 @@ Optional Features: --with-capstone4 build v4 branch of capstone --with-syscapstone force to use system-wide capstone --with-syslz4 force to use system's liblz4 + --with-smallz4 build with smallz4 use it as the lz4 decompressor --with-syszip force to use system's libzip and zlib --with-sysxxhash force to use system's xxhash --without-gpl do not build GPL code (grub, cxx, ... ) @@ -218,20 +219,16 @@ Optional Features: --with-ostype Choose OS ( android windows wsl mingw32 bsd solaris gnulinux darwin haiku ) (USEROSTYPE=auto) --with-libversion specify different libversion (LIBVERSION=xxx) --without-jemalloc build without jemalloc - --with-checks-level value between 0 and 3 to enable different level of assert (see R_CHECKS_LEVEL) (R_CHECKS_LEVEL=2) -" -printf " -Some influential environment variables: + --with-checks-level value between 0 and 3 to enable different level of assert (see R_CHECKS_LEVEL) (R_CHECKS_LEVEL=2)\n" +printf "\nSome influential environment variables: CC C compiler command CFLAGS C compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory CPPFLAGS C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory - CPP C preprocessor -" -printf " -Report bugs to: pancake " + CPP C preprocessor\n" +printf "\nReport bugs to: pancake " echo "" exit 0 } @@ -295,7 +292,7 @@ echo "LANGS: c" echo "REQUIRED: libdl" echo "OPTIONAL: libmagic libz libzip libxxhash libssl liblibuv>=1.0.0" echo "PKGCONFIG: capstone liblz4 openssl libuv" -echo "FLAGS: --disable-debugger --with-sysmagic --disable-threads --disable-loadlibs --enable-threadsafety --without-dylink --without-fork --without-ptrace-wrap --without-gperf --without-capstone --with-new-io-cache --with-libr --with-static-themes --with-capstone5 --with-capstone4 --with-syscapstone --with-syslz4 --with-syszip --with-sysxxhash --without-gpl --with-ssl --with-ssl-crypto --with-libuv --with-rpath --with-compiler=gcc --with-ostype=auto --with-libversion=xxx --without-jemalloc --with-checks-level=2" +echo "FLAGS: --disable-debugger --with-sysmagic --disable-threads --disable-loadlibs --enable-threadsafety --without-dylink --without-fork --without-ptrace-wrap --without-gperf --without-capstone --with-new-io-cache --with-libr --with-static-themes --with-capstone5 --with-capstone4 --with-syscapstone --with-syslz4 --with-smallz4 --with-syszip --with-sysxxhash --without-gpl --with-ssl --with-ssl-crypto --with-libuv --with-rpath --with-compiler=gcc --with-ostype=auto --with-libversion=xxx --without-jemalloc --with-checks-level=2" exit 0 ;; --cache-file) @@ -364,6 +361,7 @@ echo "FLAGS: --disable-debugger --with-sysmagic --disable-threads --disabl "--with-capstone4") USE_CS4="1"; ;; "--with-syscapstone") WITH_CAPSTONE="1"; ;; "--with-syslz4") WITH_SYSLZ4="1"; ;; +"--with-smallz4") WITH_SMALLZ4="1"; ;; "--with-syszip") USE_ZIP="1"; ;; "--with-sysxxhash") USE_XXHASH="1"; ;; "--without-gpl") WITH_GPL="0"; ;; @@ -393,7 +391,7 @@ parse_options "$1" shift done -ENVWORDS="MANDIR DESCRIPTION INFODIR LIBDIR INCLUDEDIR LOCALSTATEDIR ETCDIR SYSCONFDIR DATADIR DOCDIR LIBEXECDIR SBINDIR BINDIR EPREFIX PREFIX SPREFIX TARGET HOST BUILD INSTALL INSTALL_LIB INSTALL_MAN INSTALL_PROGRAM INSTALL_PROGRAM_STRIP INSTALL_DIR INSTALL_SCRIPT INSTALL_DATA HOST_OS HOST_CPU BUILD_OS BUILD_CPU TARGET_OS TARGET_CPU VERSION VERSION_MAJOR VERSION_MINOR VERSION_PATCH VERSION_NUMBER PKGCFG_LIBDIR PKGCFG_INCDIR PKGNAME VPATH CONTACT CONTACT_NAME CONTACT_MAIL CC CFLAGS CPPFLAGS LDFLAGS HAVE_LANG_C DEBUGGER HAVE_LIB_DL DL_LIBS PKGCONFIG HAVE_PATCH PATCH HAVE_AR AR HAVE_GIT GIT HAVE_GPERF GPERF HAVE_LIB_MAGIC HAVE_LINUX_CAN_H USE_MAGIC USE_LIB_MAGIC LIBMAGIC WANT_THREADS LOADLIBS R_CRITICAL_ENABLED WANT_DYLINK HAVE_FORK WANT_PTRACE_WRAP WANT_GPERF WANT_CAPSTONE NEW_IO_CACHE WITH_LIBR WITH_STATIC_THEMES USE_CS5 USE_CS4 WITH_CAPSTONE CAPSTONE_CFLAGS CAPSTONE_LDFLAGS HAVE_PKGCFG_CAPSTONE USE_CAPSTONE LZ4_CFLAGS LZ4_LDFLAGS HAVE_PKGCFG_LIBLZ4 WITH_SYSLZ4 USE_SYSLZ4 HAVE_LIB_Z HAVE_LIB_ZIP USE_ZIP USE_LIB_ZIP LIBZIP HAVE_LIB_XXHASH USE_XXHASH USE_LIB_XXHASH LIBXXHASH WITH_GPL HAVE_DECL_ADDR_NO_RANDOMIZE HAVE_DECL___GLIBC__ HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_CLOCK_NANOSLEEP HAVE_SIGACTION HAVE_CLOCK_GETTIME CLOCK_LDFLAGS SUPPORT_GNU99 HAVE_LIB_GMP HAVE_LIB_SSL SSL_CFLAGS SSL_LDFLAGS HAVE_PKGCFG_OPENSSL HAVE_OPENSSL WANT_SSL WANT_SSL_CRYPTO WANT_LIBUV HAVE_LIBUV_VERSION_1_0_0 LIBUV_CFLAGS LIBUV_LDFLAGS HAVE_PKGCFG_LIBUV HAVE_LIBUV USE_RPATH USERCC USEROSTYPE LIBVERSION HAVE_JEMALLOC HAVE_PTRACE USE_PTRACE_WRAP R_CHECKS_LEVEL" +ENVWORDS="MANDIR DESCRIPTION INFODIR LIBDIR INCLUDEDIR LOCALSTATEDIR ETCDIR SYSCONFDIR DATADIR DOCDIR LIBEXECDIR SBINDIR BINDIR EPREFIX PREFIX SPREFIX TARGET HOST BUILD INSTALL INSTALL_LIB INSTALL_MAN INSTALL_PROGRAM INSTALL_PROGRAM_STRIP INSTALL_DIR INSTALL_SCRIPT INSTALL_DATA HOST_OS HOST_CPU BUILD_OS BUILD_CPU TARGET_OS TARGET_CPU VERSION VERSION_MAJOR VERSION_MINOR VERSION_PATCH VERSION_NUMBER PKGCFG_LIBDIR PKGCFG_INCDIR PKGNAME VPATH CONTACT CONTACT_NAME CONTACT_MAIL CC CFLAGS CPPFLAGS LDFLAGS HAVE_LANG_C DEBUGGER HAVE_LIB_DL DL_LIBS PKGCONFIG HAVE_PATCH PATCH HAVE_AR AR HAVE_GIT GIT HAVE_GPERF GPERF HAVE_LIB_MAGIC HAVE_LINUX_CAN_H USE_MAGIC USE_LIB_MAGIC LIBMAGIC WANT_THREADS LOADLIBS R_CRITICAL_ENABLED WANT_DYLINK HAVE_FORK WANT_PTRACE_WRAP WANT_GPERF WANT_CAPSTONE NEW_IO_CACHE WITH_LIBR WITH_STATIC_THEMES USE_CS5 USE_CS4 WITH_CAPSTONE CAPSTONE_CFLAGS CAPSTONE_LDFLAGS HAVE_PKGCFG_CAPSTONE USE_CAPSTONE LZ4_CFLAGS LZ4_LDFLAGS HAVE_PKGCFG_LIBLZ4 WITH_SYSLZ4 USE_SYSLZ4 WITH_SMALLZ4 USE_SMALLZ4 HAVE_LIB_Z HAVE_LIB_ZIP USE_ZIP USE_LIB_ZIP LIBZIP HAVE_LIB_XXHASH USE_XXHASH USE_LIB_XXHASH LIBXXHASH WITH_GPL HAVE_DECL_ADDR_NO_RANDOMIZE HAVE_DECL___GLIBC__ HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_CLOCK_NANOSLEEP HAVE_SIGACTION HAVE_CLOCK_GETTIME CLOCK_LDFLAGS SUPPORT_GNU99 HAVE_LIB_GMP HAVE_LIB_SSL SSL_CFLAGS SSL_LDFLAGS HAVE_PKGCFG_OPENSSL HAVE_OPENSSL WANT_SSL WANT_SSL_CRYPTO WANT_LIBUV HAVE_LIBUV_VERSION_1_0_0 LIBUV_CFLAGS LIBUV_LDFLAGS HAVE_PKGCFG_LIBUV HAVE_LIBUV USE_RPATH USERCC USEROSTYPE LIBVERSION HAVE_JEMALLOC HAVE_PTRACE USE_PTRACE_WRAP R_CHECKS_LEVEL" create_environ @@ -665,6 +663,10 @@ if [ 11 = "$WITH_SYSLZ4$LZ4_CFLAGS" ]; then USE_SYSLZ4="1" else USE_SYSLZ4="0"; fi +if [ "$WITH_SMALLZ4" = "1" ]; then +USE_SMALLZ4="1" +else +USE_SMALLZ4="0"; fi check_library HAVE_LIB_Z z 0 check_library HAVE_LIB_ZIP zip 0 if [ 11 = "$HAVE_LIB_Z$USE_ZIP" ]; then @@ -979,7 +981,7 @@ do_remove if [ "$QUIET" = 0 ]; then echo echo "Final report:" -for A in BUILD CC CFLAGS DEBUGGER HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_FORK HAVE_GPERF HAVE_LIBUV HAVE_LIB_GMP HAVE_OPENSSL WANT_SSL_CRYPTO HAVE_PTRACE HOST LDFLAGS LIBVERSION PKGCONFIG PREFIX R_CHECKS_LEVEL TARGET USERCC USEROSTYPE USE_CAPSTONE USE_LIB_MAGIC NEW_IO_CACHE USE_LIB_XXHASH USE_LIB_ZIP USE_PTRACE_WRAP USE_SYSLZ4 VERSION WANT_DYLINK ; do +for A in BUILD CC CFLAGS DEBUGGER HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_FORK HAVE_GPERF HAVE_LIBUV HAVE_LIB_GMP HAVE_OPENSSL WANT_SSL_CRYPTO HAVE_PTRACE HOST LDFLAGS LIBVERSION PKGCONFIG PREFIX R_CHECKS_LEVEL TARGET USERCC USEROSTYPE USE_CAPSTONE USE_LIB_MAGIC NEW_IO_CACHE USE_LIB_XXHASH USE_LIB_ZIP USE_PTRACE_WRAP USE_SYSLZ4 VERSION WANT_DYLINK USE_SMALLZ4 ; do eval VAL="\$${A}" [ -z "${VAL}" ] && VAL="\"\"" echo " - ${A} = ${VAL}" diff --git a/configure.acr b/configure.acr index fa14fe6154..26badd3fd1 100644 --- a/configure.acr +++ b/configure.acr @@ -73,6 +73,14 @@ IFAND WITH_SYSLZ4 LZ4_CFLAGS { } +(( SMALLZ4 )) +ARG_WITH WITH_SMALLZ4 smallz4 build with smallz4 use it as the lz4 decompressor ; +IF WITH_SMALLZ4 { + USE_SMALLZ4 = 1 ; +}{ + USE_SMALLZ4 = 0 ; +} + (( ZIP )) CHKLIB z CHKLIB zip @@ -283,7 +291,7 @@ REPORT HAVE_EXPLICIT_MEMSET HAVE_FORK HAVE_GPERF HAVE_LIBUV HAVE_LIB_GMP HAVE_OPENSSL WANT_SSL_CRYPTO HAVE_PTRACE HOST LDFLAGS LIBVERSION PKGCONFIG PREFIX R_CHECKS_LEVEL TARGET USERCC USEROSTYPE USE_CAPSTONE USE_LIB_MAGIC NEW_IO_CACHE - USE_LIB_XXHASH USE_LIB_ZIP USE_PTRACE_WRAP USE_SYSLZ4 VERSION WANT_DYLINK + USE_LIB_XXHASH USE_LIB_ZIP USE_PTRACE_WRAP USE_SYSLZ4 VERSION WANT_DYLINK USE_SMALLZ4 ; PKGCFG_DO pkgcfg/r_egg.pc r_egg r_asm r_syscall r_util r_reg r_flag r_cons ; diff --git a/libr/util/Makefile b/libr/util/Makefile index c4d74ba660..899d7318c9 100644 --- a/libr/util/Makefile +++ b/libr/util/Makefile @@ -37,6 +37,11 @@ OBJS+=big_gmp.o OBJS+=big_ssl.o OBJS+=big.o +ifeq ($(USE_SMALLZ4),1) +CFLAGS+=-DUSE_SMALLZ4 +OBJS+=$(SHLR)/smallz4/smallz4cat.o +endif + CWD=$(shell pwd) LDFLAGS+=${BN_LIBS} diff --git a/libr/util/zip.c b/libr/util/zip.c index ea4a87c886..bb1c383aeb 100644 --- a/libr/util/zip.c +++ b/libr/util/zip.c @@ -7,6 +7,36 @@ // set a maximum output buffer of 50MB #define MAXOUT 50000000 +#if USE_SMALLZ4 +#include "../../../shlr/smallz4/smallz4cat.h" + +struct UserPtr { + const ut8 * input; + ut64 inputPos; + ut8 * output; + ut64 outputPos; + ut32 * outputSize; + int error; +}; + +void smallz4Write(const unsigned char* data, unsigned int numBytes, void *userPtr) { + struct UserPtr* user = (struct UserPtr*)userPtr; + if (data != NULL && numBytes > 0) { + if (*(user->outputSize) - user->outputPos < numBytes) { + user->error = -1; + return; + } + memcpy(user->output + user->outputPos, data, numBytes); + user->outputPos += numBytes; + } +} + +unsigned char smallz4GetByte(void *userPtr) { + struct UserPtr* user = (struct UserPtr*)userPtr; + return *(user->input + (user->inputPos++)); +} +#endif + static const char *gzerr(int n) { const char * const errors[] = { "", @@ -89,8 +119,23 @@ R_API ut8 *r_inflate_lz4(const ut8 *src, int srcLen, int *consumed, int *dstLen) if (!obuf) { return NULL; } + +#if USE_SMALLZ4 + struct UserPtr user = + { + .input = src, + .inputPos = 0, + .output = obuf, + .outputPos = 0, + .outputSize = &osz, + .error = 0 + }; + int res = unlz4Block_userPtr (smallz4GetByte, smallz4Write, &user, srcLen, NULL, NULL); + if (res < 1 || user.error != 0) { +#else int res = LZ4_decompress_safe ((const char*)src, (char*)obuf, (uint32_t) srcLen, (uint32_t) osz); if (res < 1) { +#endif int mul = srcLen / -res; int nosz = osz * (5 * (mul + 1)); if (nosz < osz) { @@ -104,8 +149,19 @@ R_API ut8 *r_inflate_lz4(const ut8 *src, int srcLen, int *consumed, int *dstLen) } obuf = nbuf; osz = nosz; +#if USE_SMALLZ4 + user.output = obuf; + user.inputPos = 0; + user.outputPos = 0; + user.error = 0; + res = unlz4Block_userPtr (smallz4GetByte, smallz4Write, &user, srcLen, NULL, NULL); + } + user.output = NULL; + user.input = NULL; +#else } res = LZ4_decompress_safe ((const char*)src, (char*)obuf, (uint32_t) srcLen, (uint32_t) osz); +#endif if (res > 0) { *dstLen = res; *consumed = srcLen; diff --git a/shlr/smallz4/Makefile b/shlr/smallz4/Makefile new file mode 100644 index 0000000000..a5351f85cc --- /dev/null +++ b/shlr/smallz4/Makefile @@ -0,0 +1,10 @@ +CFLAGS = -O2 -Wall -pedantic -s -std=c99 + +smallz4cat.o: + $(CC) $(CCFLAGS) $(EXTRAFLAGS) -c smallz4cat.c + +all32: CFLAGS+=-m32 + +.PHONY: clean +clean: + @$(RM) *.o diff --git a/shlr/smallz4/smallz4cat.c b/shlr/smallz4/smallz4cat.c new file mode 100644 index 0000000000..1101ee93f7 --- /dev/null +++ b/shlr/smallz4/smallz4cat.c @@ -0,0 +1,435 @@ +// ////////////////////////////////////////////////////////// +// smallz4cat.c +// Copyright (c) 2016-2019 Stephan Brumme. All rights reserved. +// see https://create.stephan-brumme.com/smallz4/ +// +// "MIT License": +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, +// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +// This program is a shorter, more readable, albeit slower re-implementation of lz4cat ( https://github.com/Cyan4973/xxHash ) + +// compile: gcc smallz4cat.c -O3 -o smallz4cat -Wall -pedantic -std=c99 -s +// The static 8k binary was compiled using Clang and dietlibc (see https://www.fefe.de/dietlibc/ ) + +// Limitations: +// - skippable frames and legacy frames are not implemented (and most likely never will) +// - checksums are not verified (see https://create.stephan-brumme.com/xxhash/ for a simple implementation) + +// Replace getByteFromIn() and sendToOut() by your own code if you need in-memory LZ4 decompression. +// Corrupted data causes a call to unlz4error(). + +// suppress warnings when compiled by Visual C++ +#define _CRT_SECURE_NO_WARNINGS + +#include // stdin/stdout/stderr, fopen, ... +#include // exit() +#include // memcpy + +#ifndef FALSE +#define FALSE 0 +#define TRUE 1 +#endif + +/// error handler +static void unlz4error(const char* msg) +{ + // smaller static binary than fprintf(stderr, "ERROR: %s\n", msg); + fputs ("ERROR: ", stderr); + fputs (msg, stderr); + fputc ('\n', stderr); +} + + +// ==================== I/O INTERFACE ==================== + + +// read one byte from input, see getByteFromIn() for a basic implementation +typedef unsigned char (*GET_BYTE) (void* userPtr); +// write several bytes, see sendBytesToOut() for a basic implementation +typedef void (*SEND_BYTES)(const unsigned char*, unsigned int, void* userPtr); + +int unlz4Block_userPtr (GET_BYTE getByte, SEND_BYTES sendBytes, void *userPtr, unsigned int blockSize, unsigned int *position, unsigned char *hist); + +struct UserPtr +{ + // file handles + FILE *in; + FILE *out; + // modify input buffer size as you like ... for most use cases, bigger buffer aren't faster anymore - and even reducing to 1 byte works ! +#define READ_BUFFER_SIZE 4*1024 + unsigned char readBuffer[READ_BUFFER_SIZE]; + unsigned int pos; + unsigned int available; +}; + +/// read a single byte (with simple buffering) +static unsigned char getByteFromIn(void* userPtr) // parameter "userPtr" not needed +{ + /// cast user-specific data + struct UserPtr *user = (struct UserPtr *)userPtr; + + // refill buffer + if (user->pos == user->available) { + user->pos = 0; + user->available = fread (user->readBuffer, 1, READ_BUFFER_SIZE, user->in); + if (user->available == 0) + unlz4error ("out of data"); + } + + // return a byte + return user->readBuffer[user->pos++]; +} + +/// write a block of bytes +static void sendBytesToOut(const unsigned char* data, unsigned int numBytes, void* userPtr) +{ + /// cast user-specific data + struct UserPtr *user = (struct UserPtr *)userPtr; + if (data != NULL && numBytes > 0) + fwrite (data, 1, numBytes, user->out); +} + + +// ==================== LZ4 DECOMPRESSOR ==================== + + +/// decompress everything in input stream (accessed via getByte) and write to output stream (via sendBytes) +int unlz4_userPtr(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary, void* userPtr) +{ + // signature + unsigned char signature1 = getByte (userPtr); + unsigned char signature2 = getByte (userPtr); + unsigned char signature3 = getByte (userPtr); + unsigned char signature4 = getByte (userPtr); + unsigned int signature = (signature4 << 24) | (signature3 << 16) | (signature2 << 8) | signature1; + unsigned char isModern = (signature == 0x184D2204); + unsigned char isLegacy = (signature == 0x184C2102); + if (!isModern && !isLegacy) { + unlz4error ("invalid signature"); + return -1; + } + + unsigned char hasBlockChecksum = FALSE; + unsigned char hasContentSize = FALSE; + unsigned char hasContentChecksum = FALSE; + unsigned char hasDictionaryID = FALSE; + if (isModern) { + // flags + unsigned char flags = getByte (userPtr); + hasBlockChecksum = flags & 16; + hasContentSize = flags & 8; + hasContentChecksum = flags & 4; + hasDictionaryID = flags & 1; + + // only version 1 file format + unsigned char version = flags >> 6; + if (version != 1) { + unlz4error ("only LZ4 file format version 1 supported"); + return -1; + } + + // ignore blocksize + char numIgnore = 1; + + // ignore, skip 8 bytes + if (hasContentSize) + numIgnore += 8; + // ignore, skip 4 bytes + if (hasDictionaryID) + numIgnore += 4; + + // ignore header checksum (xxhash32 of everything up this point & 0xFF) + numIgnore++; + + // skip all those ignored bytes + while (numIgnore--) + getByte (userPtr); + } + + // don't lower this value, backreferences can be 64kb far away +#define HISTORY_SIZE 64 * 1024 + // contains the latest decoded data + unsigned char history[HISTORY_SIZE]; + // next free position in history[] + unsigned int pos = 0; + + // dictionary compression is a recently introduced feature, just move its contents to the buffer + if (dictionary != NULL) { + // open dictionary + FILE *dict = fopen (dictionary, "rb"); + if (!dict) { + unlz4error ("cannot open dictionary"); + return -1; + } + + // get dictionary's filesize + fseek (dict, 0, SEEK_END); + long dictSize = ftell (dict); + // only the last 64k are relevant + long relevant = dictSize < 65536? 0: dictSize - 65536; + fseek (dict, relevant, SEEK_SET); + if (dictSize > 65536) + dictSize = 65536; + // read it and store it at the end of the buffer + fread (history + HISTORY_SIZE - dictSize, 1, dictSize, dict); + fclose (dict); + } + + // parse all blocks until blockSize == 0 + while (1) { + // block size + unsigned int blockSize = getByte (userPtr); + blockSize |= (unsigned int)getByte (userPtr) << 8; + blockSize |= (unsigned int)getByte (userPtr) << 16; + blockSize |= (unsigned int)getByte (userPtr) << 24; + + // highest bit set ? + unsigned char isCompressed = isLegacy || (blockSize & 0x80000000) == 0; + if (isModern) + blockSize &= 0x7FFFFFFF; + + // stop after last block + if (blockSize == 0) + break; + + if (isCompressed) { + // decompress block + + int numWritten = unlz4Block_userPtr (getByte, sendBytes, userPtr, blockSize, &pos, history); + + // all legacy blocks must be completely filled - except for the last one + if (isLegacy && numWritten + pos < 8 * 1024 * 1024) + break; + } else { + // copy uncompressed data and add to history, too (if next block is compressed and some matches refer to this block) + while (blockSize-- > 0) { + // copy a byte ... + history[pos++] = getByte (userPtr); + // ... until buffer is full => send to output + if (pos == HISTORY_SIZE) { + sendBytes (history, HISTORY_SIZE, userPtr); + pos = 0; + } + } + } + + if (hasBlockChecksum) { + // ignore checksum, skip 4 bytes + getByte (userPtr); + getByte (userPtr); + getByte (userPtr); + getByte (userPtr); + } + } + + if (hasContentChecksum) { + // ignore checksum, skip 4 bytes + getByte (userPtr); + getByte (userPtr); + getByte (userPtr); + getByte (userPtr); + } + + // flush output buffer + sendBytes (history, pos, userPtr); + return 0; +} + +/// decompress an lz4 block +int unlz4Block_userPtr (GET_BYTE getByte, SEND_BYTES sendBytes, void *userPtr, unsigned int blockSize, unsigned int *position, unsigned char *hist) { + + // contains the latest decoded data + unsigned char history[HISTORY_SIZE]; + // next free position in history[] + unsigned int pos = 0; + + // better way to do this? + if (position != NULL && hist != NULL) { + pos = *position; + memcpy (history, hist, pos); + } + + // decompress block + unsigned int blockOffset = 0; + unsigned int numWritten = 0; + while (blockOffset < blockSize) { + // get a token + unsigned char token = getByte (userPtr); + blockOffset++; + + // determine number of literals + unsigned int numLiterals = token >> 4; + if (numLiterals == 15) { + // number of literals length encoded in more than 1 byte + unsigned char current; + do { + current = getByte (userPtr); + numLiterals += current; + blockOffset++; + } while (current == 255); + } + + blockOffset += numLiterals; + + // copy all those literals + if (pos + numLiterals < HISTORY_SIZE) { + // fast loop + while (numLiterals-- > 0) + history[pos++] = getByte (userPtr); + } else { + // slow loop + while (numLiterals-- > 0) { + history[pos++] = getByte (userPtr); + + // flush output buffer + if (pos == HISTORY_SIZE) { + sendBytes (history, HISTORY_SIZE, userPtr); + numWritten += HISTORY_SIZE; + pos = 0; + } + } + } + + // last token has only literals + if (blockOffset == blockSize) + break; + + // match distance is encoded in two bytes (little endian) + unsigned int delta = getByte (userPtr); + delta |= (unsigned int)getByte (userPtr) << 8; + // zero isn't allowed + if (delta == 0) { + unlz4error ("invalid offset"); + return -1; + } + blockOffset += 2; + + // match length (always >= 4, therefore length is stored minus 4) + unsigned int matchLength = 4 + (token & 0x0F); + if (matchLength == 4 + 0x0F) { + unsigned char current; + do // match length encoded in more than 1 byte + { + current = getByte (userPtr); + matchLength += current; + blockOffset++; + } while (current == 255); + } + + // copy match + unsigned int referencePos = (pos >= delta)? (pos - delta): (HISTORY_SIZE + pos - delta); + // start and end within the current 64k block ? + if (pos + matchLength < HISTORY_SIZE && referencePos + matchLength < HISTORY_SIZE) { + // read/write continuous block (no wrap-around at the end of history[]) + // fast copy + if (pos >= referencePos + matchLength || referencePos >= pos + matchLength) { + // non-overlapping + memcpy (history + pos, history + referencePos, matchLength); + pos += matchLength; + } else { + // overlapping, slower byte-wise copy + while (matchLength-- > 0) + history[pos++] = history[referencePos++]; + } + } else { + // either read or write wraps around at the end of history[] + while (matchLength-- > 0) { + // copy single byte + history[pos++] = history[referencePos++]; + + // cannot write anymore ? => wrap around + if (pos == HISTORY_SIZE) { + // flush output buffer + sendBytes (history, HISTORY_SIZE, userPtr); + numWritten += HISTORY_SIZE; + pos = 0; + } + // wrap-around of read location + referencePos %= HISTORY_SIZE; + } + } + } + + // flush output buffer + if (pos > 0) { + sendBytes (history, pos, userPtr); + numWritten += pos; + pos = 0; + } + + // If we flushed everything, it's normal that position = 0 + // and nothing gets copied back into history. + if (position != NULL && hist != NULL) { + *position = pos; + memcpy (hist, history, pos); + } + return numWritten; +} + +/// old interface where getByte and sendBytes use global file handles +void unlz4(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary) +{ + unlz4_userPtr (getByte, sendBytes, dictionary, NULL); +} + + +// ==================== COMMAND-LINE HANDLING ==================== + + +/// parse command-line +int main(int argc, const char* argv[]) +{ + // default input/output streams + struct UserPtr user = { + .in = stdin, + .out = stdout, + .pos = 0, // initial input buffer is empty + .available = 0 + }; + + const char *dictionary = NULL; + + // first command-line parameter is our input filename / but ignore "-" which stands for STDIN + int parameter; + for (parameter = 1; parameter < argc; parameter++) { + const char *current = argv[parameter]; + // dictionary + if (current[0] == '-' && current[1] == 'D') { + if (parameter + 1 >= argc) + unlz4error ("no dictionary filename found"); + dictionary = argv[++parameter]; + continue; + } + + // filename + // read from STDIN, default behavior + if (current[0] != '-' && current[1] != '\0') { + // already have a filename - at most one filename is allowed (except for dictionary) ? + if (user.in != stdin) + unlz4error ("can only decompress one file at a time"); + // get handle + user.in = fopen (argv[1], "rb"); + if (!user.in) + unlz4error ("file not found"); + } + } + + // and go ! + unlz4_userPtr (getByteFromIn, sendBytesToOut, dictionary, &user); + return 0; +} diff --git a/shlr/smallz4/smallz4cat.h b/shlr/smallz4/smallz4cat.h new file mode 100644 index 0000000000..4043caa79a --- /dev/null +++ b/shlr/smallz4/smallz4cat.h @@ -0,0 +1,3 @@ +typedef unsigned char (*GET_BYTE) (void* userPtr); +typedef void (*SEND_BYTES)(const unsigned char*, unsigned int, void* userPtr); +int unlz4Block_userPtr (GET_BYTE getByte, SEND_BYTES sendBytes, void *userPtr, unsigned int blockSize, unsigned int *position, unsigned char *hist);