Bug 1883629 - Update Snappy to version 1.2.0. r=dom-storage-reviewers,janv

Differential Revision: https://phabricator.services.mozilla.com/D203634
This commit is contained in:
Ryan VanderMeulen 2024-04-08 13:48:06 +00:00
parent e790dbc058
commit e082f2a05b
17 changed files with 975 additions and 278 deletions

View File

@ -34,7 +34,7 @@
namespace mozilla::dom::cache { namespace mozilla::dom::cache {
static_assert(SNAPPY_VERSION == 0x010109); static_assert(SNAPPY_VERSION == 0x010200);
using mozilla::dom::quota::Client; using mozilla::dom::quota::Client;
using mozilla::dom::quota::CloneFileAndAppend; using mozilla::dom::quota::CloneFileAndAppend;

View File

@ -66,7 +66,7 @@ class nsIFile;
namespace mozilla::dom::indexedDB { namespace mozilla::dom::indexedDB {
static_assert(SNAPPY_VERSION == 0x010109); static_assert(SNAPPY_VERSION == 0x010200);
using mozilla::ipc::IsOnBackgroundThread; using mozilla::ipc::IsOnBackgroundThread;

View File

@ -16,7 +16,7 @@
namespace mozilla::dom { namespace mozilla::dom {
static_assert(SNAPPY_VERSION == 0x010109); static_assert(SNAPPY_VERSION == 0x010200);
bool SnappyCompress(const nsACString& aSource, nsACString& aDest) { bool SnappyCompress(const nsACString& aSource, nsACString& aDest) {
MOZ_ASSERT(!aSource.IsVoid()); MOZ_ASSERT(!aSource.IsVoid());

View File

@ -0,0 +1,22 @@
diff --git a/other-licenses/snappy/src/snappy.h b/other-licenses/snappy/src/snappy.h
--- a/other-licenses/snappy/src/snappy.h
+++ b/other-licenses/snappy/src/snappy.h
@@ -60,17 +60,17 @@ namespace snappy {
// 9 in the future.
// If you played with other compression algorithms, level 1 is equivalent to
// fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode
// and compresses somewhere around zstd:-3 and zstd:-2 but generally with
// faster decompression speeds than snappy:1 and zstd:-3.
int level = DefaultCompressionLevel();
constexpr CompressionOptions() = default;
- constexpr CompressionOptions(int compression_level)
+ constexpr explicit CompressionOptions(int compression_level)
: level(compression_level) {}
static constexpr int MinCompressionLevel() { return 1; }
static constexpr int MaxCompressionLevel() { return 2; }
static constexpr int DefaultCompressionLevel() { return 1; }
};
// ------------------------------------------------------------------------
// Generic compression/decompression routines.

View File

@ -1,17 +1,20 @@
See src/README for the README that ships with snappy. See src/README for the README that ships with snappy.
Mozilla does not modify the actual snappy source with the exception of the Mozilla does not modify the actual snappy source with the exception of the
'snappy-stubs-public.h' header. We have replaced its build system with our own. 'snappy-stubs-public.h' header and one small patch to resolve implicit
constructor warnings. We have replaced its build system with our own.
Snappy comes from: Snappy comes from:
https://github.com/google/snappy https://github.com/google/snappy
We are currently using revision: 1.1.9 We are currently using revision: 1.2.0
To upgrade to a newer version: To upgrade to a newer version:
1. Check out the new code using subversion. 1. Check out the new code using git.
2. Update 'snappy-stubs-public.h' in this directory with any changes that were 2. Update 'snappy-stubs-public.h' in this directory with any changes that were
made to 'snappy-stubs-public.h.in' in the new source. made to 'snappy-stubs-public.h.in' in the new source. Note that we don't
bother trying to detect the availability of sys/uio.h and unconditionally
define the iovec type instead for all platforms.
3. Copy the major/minor/patch versions from 'CMakeLists.txt' into 3. Copy the major/minor/patch versions from 'CMakeLists.txt' into
'snappy-stubs-public.h'. 'snappy-stubs-public.h'.
4. Copy all source files from the new version into the src subdirectory. The 4. Copy all source files from the new version into the src subdirectory. The
@ -23,4 +26,5 @@ To upgrade to a newer version:
- 'testdata' subdirectory - 'testdata' subdirectory
- 'third_party' subdirectory - 'third_party' subdirectory
5. Update the revision stamp in this file. 5. Update the revision stamp in this file.
6. Apply 01-explicit.patch.

View File

@ -1,5 +1,4 @@
// Copyright 2011 Google Inc. All Rights Reserved. // Copyright 2011 Google Inc. All Rights Reserved.
// Author: sesse@google.com (Steinar H. Gunderson)
// //
// Redistribution and use in source and binary forms, with or without // Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are // modification, are permitted provided that the following conditions are
@ -39,8 +38,8 @@
#include <cstddef> #include <cstddef>
#define SNAPPY_MAJOR 1 #define SNAPPY_MAJOR 1
#define SNAPPY_MINOR 1 #define SNAPPY_MINOR 2
#define SNAPPY_PATCHLEVEL 9 #define SNAPPY_PATCHLEVEL 0
#define SNAPPY_VERSION \ #define SNAPPY_VERSION \
((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)

View File

@ -3,30 +3,10 @@
We'd love to accept your patches and contributions to this project. There are We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow. just a few small guidelines you need to follow.
## Project Goals
In addition to the aims listed at the top of the [README](README.md) Snappy
explicitly supports the following:
1. C++11
2. Clang (gcc and MSVC are best-effort).
3. Low level optimizations (e.g. assembly or equivalent intrinsics) for:
1. [x86](https://en.wikipedia.org/wiki/X86)
2. [x86-64](https://en.wikipedia.org/wiki/X86-64)
3. ARMv7 (32-bit)
4. ARMv8 (AArch64)
4. Supports only the Snappy compression scheme as described in
[format_description.txt](format_description.txt).
5. CMake for building
Changes adding features or dependencies outside of the core area of focus listed
above might not be accepted. If in doubt post a message to the
[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression).
## Contributor License Agreement ## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License Contributions to this project must be accompanied by a Contributor License
Agreement. You (or your employer) retain the copyright to your contribution, Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one. your current agreements on file or to sign a new one.
@ -35,12 +15,17 @@ You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it (even if it was for a different project), you probably don't need to do it
again. again.
## Code reviews ## Code Reviews
All submissions, including submissions by project members, require review. We All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests. information on using pull requests.
Please make sure that all the automated checks (CLA, AppVeyor, Travis) pass for See [the README](README.md#contributing-to-the-snappy-project) for areas
your pull requests. Pull requests whose checks fail may be ignored. where we are likely to accept external contributions.
## Community Guidelines
This project follows [Google's Open Source Community
Guidelines](https://opensource.google/conduct/).

View File

@ -1,3 +1,9 @@
Snappy v1.1.10, Mar 8th 2023:
* Performance improvements
* Compilation fixes for various environments
Snappy v1.1.9, May 4th 2021: Snappy v1.1.9, May 4th 2021:
* Performance improvements. * Performance improvements.

View File

@ -1,7 +1,6 @@
Snappy, a fast compressor/decompressor. Snappy, a fast compressor/decompressor.
[![Build Status](https://travis-ci.org/google/snappy.svg?branch=master)](https://travis-ci.org/google/snappy) [![Build Status](https://github.com/google/snappy/actions/workflows/build.yml/badge.svg)](https://github.com/google/snappy/actions/workflows/build.yml)
[![Build status](https://ci.appveyor.com/api/projects/status/t9nubcqkwo8rw8yn/branch/master?svg=true)](https://ci.appveyor.com/project/pwnall/leveldb)
Introduction Introduction
============ ============
@ -90,13 +89,13 @@ your calling file, and link against the compiled library.
There are many ways to call Snappy, but the simplest possible is There are many ways to call Snappy, but the simplest possible is
```cpp ```c++
snappy::Compress(input.data(), input.size(), &output); snappy::Compress(input.data(), input.size(), &output);
``` ```
and similarly and similarly
```cpp ```c++
snappy::Uncompress(input.data(), input.size(), &output); snappy::Uncompress(input.data(), input.size(), &output);
``` ```
@ -132,6 +131,32 @@ should provide a reasonably balanced starting point for benchmarking. (Note that
baddata[1-3].snappy are not intended as benchmarks; they are used to verify baddata[1-3].snappy are not intended as benchmarks; they are used to verify
correctness in the presence of corrupted data in the unit test.) correctness in the presence of corrupted data in the unit test.)
Contributing to the Snappy Project
==================================
In addition to the aims listed at the top of the [README](README.md) Snappy
explicitly supports the following:
1. C++11
2. Clang (gcc and MSVC are best-effort).
3. Low level optimizations (e.g. assembly or equivalent intrinsics) for:
1. [x86](https://en.wikipedia.org/wiki/X86)
2. [x86-64](https://en.wikipedia.org/wiki/X86-64)
3. ARMv7 (32-bit)
4. ARMv8 (AArch64)
4. Supports only the Snappy compression scheme as described in
[format_description.txt](format_description.txt).
5. CMake for building
Changes adding features or dependencies outside of the core area of focus listed
above might not be accepted. If in doubt post a message to the
[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression).
We are unlikely to accept contributions to the build configuration files, such
as `CMakeLists.txt`. We are focused on maintaining a build configuration that
allows us to test that the project works in a few supported configurations
inside Google. We are not currently interested in supporting other requirements,
such as different operating systems, compilers, or build systems.
Contact Contact
======= =======

View File

@ -31,11 +31,88 @@
#ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ #ifndef THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
#define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_ #define THIRD_PARTY_SNAPPY_SNAPPY_INTERNAL_H_
#include <utility>
#include "snappy-stubs-internal.h" #include "snappy-stubs-internal.h"
#if SNAPPY_HAVE_SSSE3
// Please do not replace with <x86intrin.h> or with headers that assume more
// advanced SSE versions without checking with all the OWNERS.
#include <emmintrin.h>
#include <tmmintrin.h>
#endif
#if SNAPPY_HAVE_NEON
#include <arm_neon.h>
#endif
#if SNAPPY_HAVE_SSSE3 || SNAPPY_HAVE_NEON
#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 1
#else
#define SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE 0
#endif
namespace snappy { namespace snappy {
namespace internal { namespace internal {
#if SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
#if SNAPPY_HAVE_SSSE3
using V128 = __m128i;
#elif SNAPPY_HAVE_NEON
using V128 = uint8x16_t;
#endif
// Load 128 bits of integer data. `src` must be 16-byte aligned.
inline V128 V128_Load(const V128* src);
// Load 128 bits of integer data. `src` does not need to be aligned.
inline V128 V128_LoadU(const V128* src);
// Store 128 bits of integer data. `dst` does not need to be aligned.
inline void V128_StoreU(V128* dst, V128 val);
// Shuffle packed 8-bit integers using a shuffle mask.
// Each packed integer in the shuffle mask must be in [0,16).
inline V128 V128_Shuffle(V128 input, V128 shuffle_mask);
// Constructs V128 with 16 chars |c|.
inline V128 V128_DupChar(char c);
#if SNAPPY_HAVE_SSSE3
inline V128 V128_Load(const V128* src) { return _mm_load_si128(src); }
inline V128 V128_LoadU(const V128* src) { return _mm_loadu_si128(src); }
inline void V128_StoreU(V128* dst, V128 val) { _mm_storeu_si128(dst, val); }
inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
return _mm_shuffle_epi8(input, shuffle_mask);
}
inline V128 V128_DupChar(char c) { return _mm_set1_epi8(c); }
#elif SNAPPY_HAVE_NEON
inline V128 V128_Load(const V128* src) {
return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
}
inline V128 V128_LoadU(const V128* src) {
return vld1q_u8(reinterpret_cast<const uint8_t*>(src));
}
inline void V128_StoreU(V128* dst, V128 val) {
vst1q_u8(reinterpret_cast<uint8_t*>(dst), val);
}
inline V128 V128_Shuffle(V128 input, V128 shuffle_mask) {
assert(vminvq_u8(shuffle_mask) >= 0 && vmaxvq_u8(shuffle_mask) <= 15);
return vqtbl1q_u8(input, shuffle_mask);
}
inline V128 V128_DupChar(char c) { return vdupq_n_u8(c); }
#endif
#endif // SNAPPY_HAVE_VECTOR_BYTE_SHUFFLE
// Working memory performs a single allocation to hold all scratch space // Working memory performs a single allocation to hold all scratch space
// required for compression. // required for compression.
class WorkingMemory { class WorkingMemory {
@ -95,8 +172,9 @@ char* CompressFragment(const char* input,
// loading from s2 + n. // loading from s2 + n.
// //
// Separate implementation for 64-bit, little-endian cpus. // Separate implementation for 64-bit, little-endian cpus.
#if !defined(SNAPPY_IS_BIG_ENDIAN) && \ #if !SNAPPY_IS_BIG_ENDIAN && \
(defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || defined(ARCH_ARM)) (defined(__x86_64__) || defined(_M_X64) || defined(ARCH_PPC) || \
defined(ARCH_ARM))
static inline std::pair<size_t, bool> FindMatchLength(const char* s1, static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
const char* s2, const char* s2,
const char* s2_limit, const char* s2_limit,
@ -154,8 +232,9 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
uint64_t xorval = a1 ^ a2; uint64_t xorval = a1 ^ a2;
int shift = Bits::FindLSBSetNonZero64(xorval); int shift = Bits::FindLSBSetNonZero64(xorval);
size_t matched_bytes = shift >> 3; size_t matched_bytes = shift >> 3;
uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
#ifndef __x86_64__ #ifndef __x86_64__
*data = UNALIGNED_LOAD64(s2 + matched_bytes); a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
#else #else
// Ideally this would just be // Ideally this would just be
// //
@ -166,19 +245,21 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
// use a conditional move (it's tuned to cut data dependencies). In this // use a conditional move (it's tuned to cut data dependencies). In this
// case there is a longer parallel chain anyway AND this will be fairly // case there is a longer parallel chain anyway AND this will be fairly
// unpredictable. // unpredictable.
uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
asm("testl %k2, %k2\n\t" asm("testl %k2, %k2\n\t"
"cmovzq %1, %0\n\t" "cmovzq %1, %0\n\t"
: "+r"(a2) : "+r"(a2)
: "r"(a3), "r"(xorval)); : "r"(a3), "r"(xorval)
*data = a2 >> (shift & (3 * 8)); : "cc");
#endif #endif
*data = a2 >> (shift & (3 * 8));
return std::pair<size_t, bool>(matched_bytes, true); return std::pair<size_t, bool>(matched_bytes, true);
} else { } else {
matched = 8; matched = 8;
s2 += 8; s2 += 8;
} }
} }
SNAPPY_PREFETCH(s1 + 64);
SNAPPY_PREFETCH(s2 + 64);
// Find out how long the match is. We loop over the data 64 bits at a // Find out how long the match is. We loop over the data 64 bits at a
// time until we find a 64-bit block that doesn't match; then we find // time until we find a 64-bit block that doesn't match; then we find
@ -194,16 +275,17 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
uint64_t xorval = a1 ^ a2; uint64_t xorval = a1 ^ a2;
int shift = Bits::FindLSBSetNonZero64(xorval); int shift = Bits::FindLSBSetNonZero64(xorval);
size_t matched_bytes = shift >> 3; size_t matched_bytes = shift >> 3;
#ifndef __x86_64__
*data = UNALIGNED_LOAD64(s2 + matched_bytes);
#else
uint64_t a3 = UNALIGNED_LOAD64(s2 + 4); uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
#ifndef __x86_64__
a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
#else
asm("testl %k2, %k2\n\t" asm("testl %k2, %k2\n\t"
"cmovzq %1, %0\n\t" "cmovzq %1, %0\n\t"
: "+r"(a2) : "+r"(a2)
: "r"(a3), "r"(xorval)); : "r"(a3), "r"(xorval)
*data = a2 >> (shift & (3 * 8)); : "cc");
#endif #endif
*data = a2 >> (shift & (3 * 8));
matched += matched_bytes; matched += matched_bytes;
assert(matched >= 8); assert(matched >= 8);
return std::pair<size_t, bool>(matched, false); return std::pair<size_t, bool>(matched, false);
@ -252,6 +334,31 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
} }
#endif #endif
static inline size_t FindMatchLengthPlain(const char* s1, const char* s2,
const char* s2_limit) {
// Implementation based on the x86-64 version, above.
assert(s2_limit >= s2);
int matched = 0;
while (s2 <= s2_limit - 8 &&
UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched)) {
s2 += 8;
matched += 8;
}
if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 8) {
uint64_t x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
int matching_bits = Bits::FindLSBSetNonZero64(x);
matched += matching_bits >> 3;
s2 += matching_bits >> 3;
} else {
while ((s2 < s2_limit) && (s1[matched] == *s2)) {
++s2;
++matched;
}
}
return matched;
}
// Lookup tables for decompression code. Give --snappy_dump_decompression_table // Lookup tables for decompression code. Give --snappy_dump_decompression_table
// to the unit test to recompute char_table. // to the unit test to recompute char_table.

View File

@ -31,7 +31,7 @@
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
#ifdef HAVE_CONFIG_H #if HAVE_CONFIG_H
#include "config.h" #include "config.h"
#endif #endif
@ -43,11 +43,11 @@
#include <limits> #include <limits>
#include <string> #include <string>
#ifdef HAVE_SYS_MMAN_H #if HAVE_SYS_MMAN_H
#include <sys/mman.h> #include <sys/mman.h>
#endif #endif
#ifdef HAVE_UNISTD_H #if HAVE_UNISTD_H
#include <unistd.h> #include <unistd.h>
#endif #endif
@ -90,19 +90,25 @@
#define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))} #define ARRAYSIZE(a) int{sizeof(a) / sizeof(*(a))}
// Static prediction hints. // Static prediction hints.
#ifdef HAVE_BUILTIN_EXPECT #if HAVE_BUILTIN_EXPECT
#define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0)) #define SNAPPY_PREDICT_FALSE(x) (__builtin_expect(x, 0))
#define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) #define SNAPPY_PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
#else #else
#define SNAPPY_PREDICT_FALSE(x) x #define SNAPPY_PREDICT_FALSE(x) x
#define SNAPPY_PREDICT_TRUE(x) x #define SNAPPY_PREDICT_TRUE(x) x
#endif #endif // HAVE_BUILTIN_EXPECT
// Inlining hints. // Inlining hints.
#ifdef HAVE_ATTRIBUTE_ALWAYS_INLINE #if HAVE_ATTRIBUTE_ALWAYS_INLINE
#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline)) #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE __attribute__((always_inline))
#else #else
#define SNAPPY_ATTRIBUTE_ALWAYS_INLINE #define SNAPPY_ATTRIBUTE_ALWAYS_INLINE
#endif // HAVE_ATTRIBUTE_ALWAYS_INLINE
#if HAVE_BUILTIN_PREFETCH
#define SNAPPY_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 3)
#else
#define SNAPPY_PREFETCH(ptr) (void)(ptr)
#endif #endif
// Stubbed version of ABSL_FLAG. // Stubbed version of ABSL_FLAG.
@ -171,27 +177,42 @@ class LittleEndian {
public: public:
// Functions to do unaligned loads and stores in little-endian order. // Functions to do unaligned loads and stores in little-endian order.
static inline uint16_t Load16(const void *ptr) { static inline uint16_t Load16(const void *ptr) {
const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
// Compiles to a single mov/str on recent clang and gcc. // Compiles to a single mov/str on recent clang and gcc.
#if SNAPPY_IS_BIG_ENDIAN
const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
return (static_cast<uint16_t>(buffer[0])) | return (static_cast<uint16_t>(buffer[0])) |
(static_cast<uint16_t>(buffer[1]) << 8); (static_cast<uint16_t>(buffer[1]) << 8);
#else
// memcpy() turns into a single instruction early in the optimization
// pipeline (relatively to a series of byte accesses). So, using memcpy
// instead of byte accesses may lead to better decisions in more stages of
// the optimization pipeline.
uint16_t value;
std::memcpy(&value, ptr, 2);
return value;
#endif
} }
static inline uint32_t Load32(const void *ptr) { static inline uint32_t Load32(const void *ptr) {
const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
// Compiles to a single mov/str on recent clang and gcc. // Compiles to a single mov/str on recent clang and gcc.
#if SNAPPY_IS_BIG_ENDIAN
const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
return (static_cast<uint32_t>(buffer[0])) | return (static_cast<uint32_t>(buffer[0])) |
(static_cast<uint32_t>(buffer[1]) << 8) | (static_cast<uint32_t>(buffer[1]) << 8) |
(static_cast<uint32_t>(buffer[2]) << 16) | (static_cast<uint32_t>(buffer[2]) << 16) |
(static_cast<uint32_t>(buffer[3]) << 24); (static_cast<uint32_t>(buffer[3]) << 24);
#else
// See Load16() for the rationale of using memcpy().
uint32_t value;
std::memcpy(&value, ptr, 4);
return value;
#endif
} }
static inline uint64_t Load64(const void *ptr) { static inline uint64_t Load64(const void *ptr) {
const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
// Compiles to a single mov/str on recent clang and gcc. // Compiles to a single mov/str on recent clang and gcc.
#if SNAPPY_IS_BIG_ENDIAN
const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr);
return (static_cast<uint64_t>(buffer[0])) | return (static_cast<uint64_t>(buffer[0])) |
(static_cast<uint64_t>(buffer[1]) << 8) | (static_cast<uint64_t>(buffer[1]) << 8) |
(static_cast<uint64_t>(buffer[2]) << 16) | (static_cast<uint64_t>(buffer[2]) << 16) |
@ -200,30 +221,44 @@ class LittleEndian {
(static_cast<uint64_t>(buffer[5]) << 40) | (static_cast<uint64_t>(buffer[5]) << 40) |
(static_cast<uint64_t>(buffer[6]) << 48) | (static_cast<uint64_t>(buffer[6]) << 48) |
(static_cast<uint64_t>(buffer[7]) << 56); (static_cast<uint64_t>(buffer[7]) << 56);
#else
// See Load16() for the rationale of using memcpy().
uint64_t value;
std::memcpy(&value, ptr, 8);
return value;
#endif
} }
static inline void Store16(void *dst, uint16_t value) { static inline void Store16(void *dst, uint16_t value) {
uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
// Compiles to a single mov/str on recent clang and gcc. // Compiles to a single mov/str on recent clang and gcc.
#if SNAPPY_IS_BIG_ENDIAN
uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
buffer[0] = static_cast<uint8_t>(value); buffer[0] = static_cast<uint8_t>(value);
buffer[1] = static_cast<uint8_t>(value >> 8); buffer[1] = static_cast<uint8_t>(value >> 8);
#else
// See Load16() for the rationale of using memcpy().
std::memcpy(dst, &value, 2);
#endif
} }
static void Store32(void *dst, uint32_t value) { static void Store32(void *dst, uint32_t value) {
uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
// Compiles to a single mov/str on recent clang and gcc. // Compiles to a single mov/str on recent clang and gcc.
#if SNAPPY_IS_BIG_ENDIAN
uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
buffer[0] = static_cast<uint8_t>(value); buffer[0] = static_cast<uint8_t>(value);
buffer[1] = static_cast<uint8_t>(value >> 8); buffer[1] = static_cast<uint8_t>(value >> 8);
buffer[2] = static_cast<uint8_t>(value >> 16); buffer[2] = static_cast<uint8_t>(value >> 16);
buffer[3] = static_cast<uint8_t>(value >> 24); buffer[3] = static_cast<uint8_t>(value >> 24);
#else
// See Load16() for the rationale of using memcpy().
std::memcpy(dst, &value, 4);
#endif
} }
static void Store64(void* dst, uint64_t value) { static void Store64(void* dst, uint64_t value) {
uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
// Compiles to a single mov/str on recent clang and gcc. // Compiles to a single mov/str on recent clang and gcc.
#if SNAPPY_IS_BIG_ENDIAN
uint8_t* const buffer = reinterpret_cast<uint8_t*>(dst);
buffer[0] = static_cast<uint8_t>(value); buffer[0] = static_cast<uint8_t>(value);
buffer[1] = static_cast<uint8_t>(value >> 8); buffer[1] = static_cast<uint8_t>(value >> 8);
buffer[2] = static_cast<uint8_t>(value >> 16); buffer[2] = static_cast<uint8_t>(value >> 16);
@ -232,14 +267,18 @@ class LittleEndian {
buffer[5] = static_cast<uint8_t>(value >> 40); buffer[5] = static_cast<uint8_t>(value >> 40);
buffer[6] = static_cast<uint8_t>(value >> 48); buffer[6] = static_cast<uint8_t>(value >> 48);
buffer[7] = static_cast<uint8_t>(value >> 56); buffer[7] = static_cast<uint8_t>(value >> 56);
#else
// See Load16() for the rationale of using memcpy().
std::memcpy(dst, &value, 8);
#endif
} }
static inline constexpr bool IsLittleEndian() { static inline constexpr bool IsLittleEndian() {
#if defined(SNAPPY_IS_BIG_ENDIAN) #if SNAPPY_IS_BIG_ENDIAN
return false; return false;
#else #else
return true; return true;
#endif // defined(SNAPPY_IS_BIG_ENDIAN) #endif // SNAPPY_IS_BIG_ENDIAN
} }
}; };
@ -265,7 +304,7 @@ class Bits {
void operator=(const Bits&); void operator=(const Bits&);
}; };
#if defined(HAVE_BUILTIN_CTZ) #if HAVE_BUILTIN_CTZ
inline int Bits::Log2FloorNonZero(uint32_t n) { inline int Bits::Log2FloorNonZero(uint32_t n) {
assert(n != 0); assert(n != 0);
@ -354,7 +393,7 @@ inline int Bits::FindLSBSetNonZero(uint32_t n) {
#endif // End portable versions. #endif // End portable versions.
#if defined(HAVE_BUILTIN_CTZ) #if HAVE_BUILTIN_CTZ
inline int Bits::FindLSBSetNonZero64(uint64_t n) { inline int Bits::FindLSBSetNonZero64(uint64_t n) {
assert(n != 0); assert(n != 0);
@ -388,7 +427,7 @@ inline int Bits::FindLSBSetNonZero64(uint64_t n) {
} }
} }
#endif // End portable version. #endif // HAVE_BUILTIN_CTZ
// Variable-length integer encoding. // Variable-length integer encoding.
class Varint { class Varint {

View File

@ -151,7 +151,7 @@ LogMessageCrash::~LogMessageCrash() {
#pragma warning(pop) #pragma warning(pop)
#endif #endif
#ifdef HAVE_LIBZ #if HAVE_LIBZ
ZLib::ZLib() ZLib::ZLib()
: comp_init_(false), : comp_init_(false),

View File

@ -31,25 +31,25 @@
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ #ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_ #define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_TEST_H_
#ifdef HAVE_CONFIG_H #if HAVE_CONFIG_H
#include "config.h" #include "config.h"
#endif #endif
#include "snappy-stubs-internal.h" #include "snappy-stubs-internal.h"
#ifdef HAVE_SYS_MMAN_H #if HAVE_SYS_MMAN_H
#include <sys/mman.h> #include <sys/mman.h>
#endif #endif
#ifdef HAVE_SYS_RESOURCE_H #if HAVE_SYS_RESOURCE_H
#include <sys/resource.h> #include <sys/resource.h>
#endif #endif
#ifdef HAVE_SYS_TIME_H #if HAVE_SYS_TIME_H
#include <sys/time.h> #include <sys/time.h>
#endif #endif
#ifdef HAVE_WINDOWS_H #if HAVE_WINDOWS_H
// Needed to be able to use std::max without workarounds in the source code. // Needed to be able to use std::max without workarounds in the source code.
// https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts // https://support.microsoft.com/en-us/help/143208/prb-using-stl-in-windows-program-can-cause-min-max-conflicts
#define NOMINMAX #define NOMINMAX
@ -58,15 +58,15 @@
#define InitGoogle(argv0, argc, argv, remove_flags) ((void)(0)) #define InitGoogle(argv0, argc, argv, remove_flags) ((void)(0))
#ifdef HAVE_LIBZ #if HAVE_LIBZ
#include "zlib.h" #include "zlib.h"
#endif #endif
#ifdef HAVE_LIBLZO2 #if HAVE_LIBLZO2
#include "lzo/lzo1x.h" #include "lzo/lzo1x.h"
#endif #endif
#ifdef HAVE_LIBLZ4 #if HAVE_LIBLZ4
#include "lz4.h" #include "lz4.h"
#endif #endif
@ -216,7 +216,7 @@ class LogMessageVoidify {
#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) #define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
#define CHECK_OK(cond) (cond).ok() #define CHECK_OK(cond) (cond).ok()
#ifdef HAVE_LIBZ #if HAVE_LIBZ
// Object-oriented wrapper around zlib. // Object-oriented wrapper around zlib.
class ZLib { class ZLib {

File diff suppressed because it is too large Load Diff

View File

@ -50,13 +50,36 @@ namespace snappy {
class Source; class Source;
class Sink; class Sink;
struct CompressionOptions {
// Compression level.
// Level 1 is the fastest
// Level 2 is a little slower but provides better compression. Level 2 is
// **EXPERIMENTAL** for the time being. It might happen that we decide to
// fall back to level 1 in the future.
// Levels 3+ are currently not supported. We plan to support levels up to
// 9 in the future.
// If you played with other compression algorithms, level 1 is equivalent to
// fast mode (level 1) of LZ4, level 2 is equivalent to LZ4's level 2 mode
// and compresses somewhere around zstd:-3 and zstd:-2 but generally with
// faster decompression speeds than snappy:1 and zstd:-3.
int level = DefaultCompressionLevel();
constexpr CompressionOptions() = default;
constexpr explicit CompressionOptions(int compression_level)
: level(compression_level) {}
static constexpr int MinCompressionLevel() { return 1; }
static constexpr int MaxCompressionLevel() { return 2; }
static constexpr int DefaultCompressionLevel() { return 1; }
};
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Generic compression/decompression routines. // Generic compression/decompression routines.
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Compress the bytes read from "*source" and append to "*sink". Return the // Compress the bytes read from "*reader" and append to "*writer". Return the
// number of bytes written. // number of bytes written.
size_t Compress(Source* source, Sink* sink); size_t Compress(Source* reader, Sink* writer,
CompressionOptions options = {});
// Find the uncompressed length of the given stream, as given by the header. // Find the uncompressed length of the given stream, as given by the header.
// Note that the true length could deviate from this; the stream could e.g. // Note that the true length could deviate from this; the stream could e.g.
@ -71,14 +94,22 @@ namespace snappy {
// Higher-level string based routines (should be sufficient for most users) // Higher-level string based routines (should be sufficient for most users)
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Sets "*compressed" to the compressed version of "input[0,input_length-1]". // Sets "*compressed" to the compressed version of "input[0..input_length-1]".
// Original contents of *compressed are lost. // Original contents of *compressed are lost.
// //
// REQUIRES: "input[]" is not an alias of "*compressed". // REQUIRES: "input[]" is not an alias of "*compressed".
size_t Compress(const char* input, size_t input_length, size_t Compress(const char* input, size_t input_length,
std::string* compressed); std::string* compressed, CompressionOptions options = {});
// Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". // Same as `Compress` above but taking an `iovec` array as input. Note that
// this function preprocesses the inputs to compute the sum of
// `iov[0..iov_cnt-1].iov_len` before reading. To avoid this, use
// `RawCompressFromIOVec` below.
size_t CompressFromIOVec(const struct iovec* iov, size_t iov_cnt,
std::string* compressed,
CompressionOptions options = {});
// Decompresses "compressed[0..compressed_length-1]" to "*uncompressed".
// Original contents of "*uncompressed" are lost. // Original contents of "*uncompressed" are lost.
// //
// REQUIRES: "compressed[]" is not an alias of "*uncompressed". // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
@ -119,10 +150,15 @@ namespace snappy {
// RawCompress(input, input_length, output, &output_length); // RawCompress(input, input_length, output, &output_length);
// ... Process(output, output_length) ... // ... Process(output, output_length) ...
// delete [] output; // delete [] output;
void RawCompress(const char* input, void RawCompress(const char* input, size_t input_length, char* compressed,
size_t input_length, size_t* compressed_length, CompressionOptions options = {});
char* compressed,
size_t* compressed_length); // Same as `RawCompress` above but taking an `iovec` array as input. Note that
// `uncompressed_length` is the total number of bytes to be read from the
// elements of `iov` (_not_ the number of elements in `iov`).
void RawCompressFromIOVec(const struct iovec* iov, size_t uncompressed_length,
char* compressed, size_t* compressed_length,
CompressionOptions options = {});
// Given data in "compressed[0..compressed_length-1]" generated by // Given data in "compressed[0..compressed_length-1]" generated by
// calling the Snappy::Compress routine, this routine // calling the Snappy::Compress routine, this routine
@ -202,7 +238,7 @@ namespace snappy {
static constexpr int kMinHashTableBits = 8; static constexpr int kMinHashTableBits = 8;
static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits; static constexpr size_t kMinHashTableSize = 1 << kMinHashTableBits;
static constexpr int kMaxHashTableBits = 14; static constexpr int kMaxHashTableBits = 15;
static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits; static constexpr size_t kMaxHashTableSize = 1 << kMaxHashTableBits;
} // end namespace snappy } // end namespace snappy

View File

@ -39,22 +39,26 @@
// Entry point for LibFuzzer. // Entry point for LibFuzzer.
extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
std::string input(reinterpret_cast<const char*>(data), size); std::string input(reinterpret_cast<const char*>(data), size);
for (int level = snappy::CompressionOptions::MinCompressionLevel();
level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) {
std::string compressed;
size_t compressed_size =
snappy::Compress(input.data(), input.size(), &compressed,
snappy::CompressionOptions{/*level=*/level});
std::string compressed; (void)compressed_size; // Variable only used in debug builds.
size_t compressed_size = assert(compressed_size == compressed.size());
snappy::Compress(input.data(), input.size(), &compressed); assert(compressed.size() <= snappy::MaxCompressedLength(input.size()));
assert(
snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
(void)compressed_size; // Variable only used in debug builds. std::string uncompressed_after_compress;
assert(compressed_size == compressed.size()); bool uncompress_succeeded = snappy::Uncompress(
assert(compressed.size() <= snappy::MaxCompressedLength(input.size())); compressed.data(), compressed.size(), &uncompressed_after_compress);
assert(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
std::string uncompressed_after_compress; (void)uncompress_succeeded; // Variable only used in debug builds.
bool uncompress_succeeded = snappy::Uncompress( assert(uncompress_succeeded);
compressed.data(), compressed.size(), &uncompressed_after_compress); assert(input == uncompressed_after_compress);
}
(void)uncompress_succeeded; // Variable only used in debug builds.
assert(uncompress_succeeded);
assert(input == uncompressed_after_compress);
return 0; return 0;
} }

View File

@ -50,7 +50,7 @@ namespace snappy {
namespace { namespace {
#if defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) #if HAVE_FUNC_MMAP && HAVE_FUNC_SYSCONF
// To test against code that reads beyond its input, this class copies a // To test against code that reads beyond its input, this class copies a
// string to a newly allocated group of pages, the last of which // string to a newly allocated group of pages, the last of which
@ -96,7 +96,7 @@ class DataEndingAtUnreadablePage {
size_t size_; size_t size_;
}; };
#else // defined(HAVE_FUNC_MMAP) && defined(HAVE_FUNC_SYSCONF) #else // HAVE_FUNC_MMAP) && HAVE_FUNC_SYSCONF
// Fallback for systems without mmap. // Fallback for systems without mmap.
using DataEndingAtUnreadablePage = std::string; using DataEndingAtUnreadablePage = std::string;
@ -137,21 +137,10 @@ void VerifyStringSink(const std::string& input) {
CHECK_EQ(uncompressed, input); CHECK_EQ(uncompressed, input);
} }
void VerifyIOVec(const std::string& input) { struct iovec* GetIOVec(const std::string& input, char*& buf, size_t& num) {
std::string compressed;
DataEndingAtUnreadablePage i(input);
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
CHECK_EQ(written, compressed.size());
CHECK_LE(compressed.size(),
snappy::MaxCompressedLength(input.size()));
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
// Try uncompressing into an iovec containing a random number of entries
// ranging from 1 to 10.
char* buf = new char[input.size()];
std::minstd_rand0 rng(input.size()); std::minstd_rand0 rng(input.size());
std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10); std::uniform_int_distribution<size_t> uniform_1_to_10(1, 10);
size_t num = uniform_1_to_10(rng); num = uniform_1_to_10(rng);
if (input.size() < num) { if (input.size() < num) {
num = input.size(); num = input.size();
} }
@ -175,8 +164,40 @@ void VerifyIOVec(const std::string& input) {
} }
used_so_far += iov[i].iov_len; used_so_far += iov[i].iov_len;
} }
CHECK(snappy::RawUncompressToIOVec( return iov;
compressed.data(), compressed.size(), iov, num)); }
int VerifyIOVecSource(const std::string& input) {
std::string compressed;
std::string copy = input;
char* buf = const_cast<char*>(copy.data());
size_t num = 0;
struct iovec* iov = GetIOVec(input, buf, num);
const size_t written = snappy::CompressFromIOVec(iov, num, &compressed);
CHECK_EQ(written, compressed.size());
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
std::string uncompressed;
DataEndingAtUnreadablePage c(compressed);
CHECK(snappy::Uncompress(c.data(), c.size(), &uncompressed));
CHECK_EQ(uncompressed, input);
delete[] iov;
return uncompressed.size();
}
void VerifyIOVecSink(const std::string& input) {
std::string compressed;
DataEndingAtUnreadablePage i(input);
const size_t written = snappy::Compress(i.data(), i.size(), &compressed);
CHECK_EQ(written, compressed.size());
CHECK_LE(compressed.size(), snappy::MaxCompressedLength(input.size()));
CHECK(snappy::IsValidCompressedBuffer(compressed.data(), compressed.size()));
char* buf = new char[input.size()];
size_t num = 0;
struct iovec* iov = GetIOVec(input, buf, num);
CHECK(snappy::RawUncompressToIOVec(compressed.data(), compressed.size(), iov,
num));
CHECK(!memcmp(buf, input.data(), input.size())); CHECK(!memcmp(buf, input.data(), input.size()));
delete[] iov; delete[] iov;
delete[] buf; delete[] buf;
@ -252,15 +273,18 @@ int Verify(const std::string& input) {
// Compress using string based routines // Compress using string based routines
const int result = VerifyString(input); const int result = VerifyString(input);
// Compress using `iovec`-based routines.
CHECK_EQ(VerifyIOVecSource(input), result);
// Verify using sink based routines // Verify using sink based routines
VerifyStringSink(input); VerifyStringSink(input);
VerifyNonBlockedCompression(input); VerifyNonBlockedCompression(input);
VerifyIOVec(input); VerifyIOVecSink(input);
if (!input.empty()) { if (!input.empty()) {
const std::string expanded = Expand(input); const std::string expanded = Expand(input);
VerifyNonBlockedCompression(expanded); VerifyNonBlockedCompression(expanded);
VerifyIOVec(input); VerifyIOVecSink(input);
} }
return result; return result;
@ -540,7 +564,27 @@ TEST(Snappy, FourByteOffset) {
CHECK_EQ(uncompressed, src); CHECK_EQ(uncompressed, src);
} }
TEST(Snappy, IOVecEdgeCases) { TEST(Snappy, IOVecSourceEdgeCases) {
// Validate that empty leading, trailing, and in-between iovecs are handled:
// [] [] ['a'] [] ['b'] [].
std::string data = "ab";
char* buf = const_cast<char*>(data.data());
size_t used_so_far = 0;
static const int kLengths[] = {0, 0, 1, 0, 1, 0};
struct iovec iov[ARRAYSIZE(kLengths)];
for (int i = 0; i < ARRAYSIZE(kLengths); ++i) {
iov[i].iov_base = buf + used_so_far;
iov[i].iov_len = kLengths[i];
used_so_far += kLengths[i];
}
std::string compressed;
snappy::CompressFromIOVec(iov, ARRAYSIZE(kLengths), &compressed);
std::string uncompressed;
snappy::Uncompress(compressed.data(), compressed.size(), &uncompressed);
CHECK_EQ(data, uncompressed);
}
TEST(Snappy, IOVecSinkEdgeCases) {
// Test some tricky edge cases in the iovec output that are not necessarily // Test some tricky edge cases in the iovec output that are not necessarily
// exercised by random tests. // exercised by random tests.
@ -905,7 +949,7 @@ TEST(Snappy, VerifyCharTable) {
// COPY_1_BYTE_OFFSET. // COPY_1_BYTE_OFFSET.
// //
// The tag byte in the compressed data stores len-4 in 3 bits, and // The tag byte in the compressed data stores len-4 in 3 bits, and
// offset/256 in 5 bits. offset%256 is stored in the next byte. // offset/256 in 3 bits. offset%256 is stored in the next byte.
// //
// This format is used for length in range [4..11] and offset in // This format is used for length in range [4..11] and offset in
// range [0..2047] // range [0..2047]