mirror of
https://github.com/FEX-Emu/xxHash.git
synced 2024-11-23 22:49:39 +00:00
Merge pull request #387 from Cyan4973/x86dispatch_table
Intel vector instruction dispatcher
This commit is contained in:
commit
afed27cc26
1
.gitignore
vendored
1
.gitignore
vendored
@ -16,6 +16,7 @@ xxhsum
|
||||
xxhsum32
|
||||
xxhsum_privateXXH
|
||||
xxhsum_inlinedXXH
|
||||
dispatch
|
||||
tests/generate_unicode_test
|
||||
|
||||
# compilation chain
|
||||
|
16
.travis.yml
16
.travis.yml
@ -20,6 +20,8 @@ matrix:
|
||||
- cppcheck
|
||||
script:
|
||||
- make -B test-all
|
||||
- make clean
|
||||
- make dispatch
|
||||
|
||||
- name: Check results consistency on x64
|
||||
arch: amd64
|
||||
@ -30,7 +32,7 @@ matrix:
|
||||
- make clean
|
||||
- CPPFLAGS="-mavx2 -DXXH_VECTOR=2" make check # AVX2 code path
|
||||
- make clean
|
||||
- CPPFLAGS="-mavx512f -DXXH_VECTOR=5" make check # AVX512 code path
|
||||
- CPPFLAGS="-mavx512f -DXXH_VECTOR=3" make check # AVX512 code path
|
||||
- make clean
|
||||
- CPPFLAGS=-DXXH_REROLL=1 make check # reroll code path (#240)
|
||||
- make -C tests/bench
|
||||
@ -50,7 +52,7 @@ matrix:
|
||||
- CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=0 LDFLAGS=-static RUN_ENV=qemu-arm-static make check # Scalar code path
|
||||
- make clean
|
||||
# NEON (32-bit)
|
||||
- CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=3 CFLAGS="-O3 -march=armv7-a -fPIC -mfloat-abi=softfp -mfpu=neon-vfpv4" LDFLAGS=-static RUN_ENV=qemu-arm-static make check # NEON code path
|
||||
- CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=4 CFLAGS="-O3 -march=armv7-a -fPIC -mfloat-abi=softfp -mfpu=neon-vfpv4" LDFLAGS=-static RUN_ENV=qemu-arm-static make check # NEON code path
|
||||
|
||||
- name: aarch64 compilation and consistency checks
|
||||
dist: xenial
|
||||
@ -60,13 +62,13 @@ matrix:
|
||||
- CPPFLAGS=-DXXH_VECTOR=0 make check # Scalar code path
|
||||
# NEON (64-bit)
|
||||
- make clean
|
||||
- CPPFLAGS=-DXXH_VECTOR=3 make check # NEON code path
|
||||
- CPPFLAGS=-DXXH_VECTOR=4 make check # NEON code path
|
||||
# clang
|
||||
- make clean
|
||||
- CC=clang CPPFLAGS=-DXXH_VECTOR=0 make check # Scalar code path
|
||||
# clang + NEON
|
||||
- make clean
|
||||
- CC=clang CPPFLAGS=-DXXH_VECTOR=3 make check # NEON code path
|
||||
- CC=clang CPPFLAGS=-DXXH_VECTOR=4 make check # NEON code path
|
||||
|
||||
# We need Bionic here because the QEMU versions shipped in the older repos
|
||||
# do not support POWER8 emulation, and compiling QEMU from source is a pain.
|
||||
@ -88,7 +90,7 @@ matrix:
|
||||
- CC=powerpc64-linux-gnu-gcc RUN_ENV=qemu-ppc64-static CPPFLAGS=-DXXH_VECTOR=0 CFLAGS="-O3" LDFLAGS="-static -m64" make check # Scalar code path
|
||||
- make clean
|
||||
# VSX code
|
||||
- CC=powerpc64-linux-gnu-gcc RUN_ENV="qemu-ppc64-static -cpu power8" CFLAGS="-O3 -maltivec -mvsx -mcpu=power8 -mpower8-vector" LDFLAGS="-static -m64" make check # Auto code path
|
||||
- CC=powerpc64-linux-gnu-gcc RUN_ENV="qemu-ppc64-static -cpu power8" CPPFLAGS=-DXXH_VECTOR=5 CFLAGS="-O3 -maltivec -mvsx -mcpu=power8 -mpower8-vector" LDFLAGS="-static -m64" make check # Auto code path
|
||||
- make clean
|
||||
|
||||
- name: PPC64LE compilation and consistency checks
|
||||
@ -99,7 +101,7 @@ matrix:
|
||||
- CPPFLAGS=-DXXH_VECTOR=0 LDFLAGS=-static make check
|
||||
# VSX code path (64-bit)
|
||||
- make clean
|
||||
- CPPFLAGS=-DXXH_VECTOR=4 CFLAGS="-O3 -maltivec -mvsx -mpower8-vector -mcpu=power8" LDFLAGS="-static" make check
|
||||
- CPPFLAGS=-DXXH_VECTOR=5 CFLAGS="-O3 -maltivec -mvsx -mpower8-vector -mcpu=power8" LDFLAGS="-static" make check
|
||||
|
||||
- name: IBM s390x compilation and consistency checks
|
||||
dist: bionic
|
||||
@ -109,7 +111,7 @@ matrix:
|
||||
- CPPFLAGS=-DXXH_VECTOR=0 LDFLAGS=-static make check
|
||||
# s390x code path (64-bit)
|
||||
- make clean
|
||||
- CPPFLAGS=-DXXH_VECTOR=4 CFLAGS="-O3 -march=arch11 -mzvector" LDFLAGS="-static" make check
|
||||
- CPPFLAGS=-DXXH_VECTOR=5 CFLAGS="-O3 -march=arch11 -mzvector" LDFLAGS="-static" make check
|
||||
|
||||
- name: cmake build test
|
||||
script:
|
||||
|
25
Makefile
25
Makefile
@ -40,8 +40,8 @@ DEBUGFLAGS+=-Wall -Wextra -Wconversion -Wcast-qual -Wcast-align -Wshadow \
|
||||
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
|
||||
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
|
||||
-Wredundant-decls -Wstrict-overflow=2
|
||||
CFLAGS += $(DEBUGFLAGS)
|
||||
FLAGS = $(CFLAGS) $(CPPFLAGS) $(MOREFLAGS)
|
||||
CFLAGS += $(DEBUGFLAGS) $(MOREFLAGS)
|
||||
FLAGS = $(CFLAGS) $(CPPFLAGS)
|
||||
XXHSUM_VERSION = $(LIBVER)
|
||||
|
||||
# Define *.exe as extension for Windows systems
|
||||
@ -68,25 +68,34 @@ endif
|
||||
LIBXXH = libxxhash.$(SHARED_EXT_VER)
|
||||
|
||||
|
||||
## generate CLI and libraries in release mode (default for `make`)
|
||||
.PHONY: default
|
||||
default: ## generate CLI and libraries in release mode (default for `make`)
|
||||
default: DEBUGFLAGS=
|
||||
default: lib xxhsum_and_links
|
||||
|
||||
.PHONY: all
|
||||
all: lib xxhsum xxhsum_inlinedXXH
|
||||
|
||||
xxhsum: xxhash.o xxhsum.o ## generate command line interface (CLI)
|
||||
## xxhsum is the command line interface (CLI)
|
||||
ifeq ($(DISPATCH),1)
|
||||
xxhsum: CPPFLAGS += -DXXHSUM_DISPATCH=1
|
||||
xxhsum: xxh_x86dispatch.o
|
||||
endif
|
||||
xxhsum: xxhash.o xxhsum.o
|
||||
$(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT)
|
||||
|
||||
xxhsum32: CFLAGS += -m32 ## generate CLI in 32-bits mode
|
||||
xxhsum32: xxhash.c xxhsum.c ## do not generate object (avoid mixing different ABI)
|
||||
$(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT)
|
||||
|
||||
## dispatch only works for x86/x64 systems
|
||||
dispatch: CPPFLAGS += -DXXHSUM_DISPATCH=1
|
||||
dispatch: xxhash.o xxh_x86dispatch.o xxhsum.c
|
||||
$(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT)
|
||||
|
||||
xxhash.o: xxhash.c xxhash.h xxh3.h
|
||||
$(CC) $(FLAGS) -c $< -o $@
|
||||
xxhsum.o: xxhsum.c xxhash.h
|
||||
$(CC) $(FLAGS) -c $< -o $@
|
||||
xxhsum.o: xxhsum.c xxhash.h xxh3.h xxh_x86dispatch.h
|
||||
xxh_x86dispatch.o: xxh_x86dispatch.c xxh_x86dispatch.h xxhash.h xxh3.h
|
||||
|
||||
.PHONY: xxhsum_and_links
|
||||
xxhsum_and_links: xxhsum xxh32sum xxh64sum xxh128sum
|
||||
@ -147,7 +156,7 @@ help: ## list documented targets
|
||||
clean: ## remove all build artifacts
|
||||
@$(RM) -r *.dSYM # Mac OS-X specific
|
||||
@$(RM) core *.o *.$(SHARED_EXT) *.$(SHARED_EXT).* *.a libxxhash.pc
|
||||
@$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT)
|
||||
@$(RM) xxhsum$(EXT) xxhsum32$(EXT) xxhsum_inlinedXXH$(EXT) dispatch$(EXT)
|
||||
@$(RM) xxh32sum$(EXT) xxh64sum$(EXT) xxh128sum$(EXT)
|
||||
@echo cleaning completed
|
||||
|
||||
|
14
README.md
14
README.md
@ -79,11 +79,11 @@ which can be observed in the following graphs:
|
||||
To access these new prototypes, one needs to unlock their declaration, using the build macro `XXH_STATIC_LINKING_ONLY`.
|
||||
|
||||
The algorithm is currently in development, meaning its return values might still change in future versions.
|
||||
However, the API is stable, and can be used in production, typically for ephemeral
|
||||
data (produced and consumed in same session).
|
||||
However, the API is stable, and can be used in production,
|
||||
typically for generation of ephemeral hashes (produced and consumed in same session).
|
||||
|
||||
Since `v0.7.3`, `XXH3` has reached "release candidate" status,
|
||||
meaning that, if everything remains fine, its current format will be "frozen" and become the final one.
|
||||
`XXH3` has now reached "release candidate" status.
|
||||
If everything remains fine, its format will be "frozen" and become final.
|
||||
After which, return values of `XXH3` and `XXH128` will no longer change in future versions.
|
||||
`XXH3`'s return values will be officially finalized upon reaching `v0.8.0`.
|
||||
|
||||
@ -114,9 +114,10 @@ The following macros can be set at compilation time to modify libxxhash's behavi
|
||||
when running on architectures unable to load memory from unaligned addresses, or suffering a performance penalty from it.
|
||||
It is (slightly) detrimental on platform with good unaligned memory access performance (same instruction for both aligned and unaligned accesses).
|
||||
This option is automatically disabled on `x86`, `x64` and `aarch64`, and enabled on all other platforms.
|
||||
- `XXH_VECTOR` : manually select a vector instruction set (default: auto-selected at compilation time). `0`==`scalar`, `1`==`sse2`, `2`==`avx2`, `3`==`avx512`, `4`==`neon`, `5`==`vsx`
|
||||
- `XXH_NO_PREFETCH` : disable prefetching. XXH3 only.
|
||||
- `XXH_PREFETCH_DIST` : select prefecting distance. XXH3 only.
|
||||
- `XXH_NO_INLINE_HINTS`: By default, xxHash uses tricks like `__attribute__((always_inline))` and `__forceinline` to improve performance at the cost of code size.
|
||||
- `XXH_NO_INLINE_HINTS`: By default, xxHash uses `__attribute__((always_inline))` and `__forceinline` to improve performance at the cost of code size.
|
||||
Defining this macro to 1 will mark all internal functions as `static`, allowing the compiler to decide whether to inline a function or not.
|
||||
This is very useful when optimizing for smallest binary size,
|
||||
and is automatically defined when compiling with `-O0`, `-Os`, `-Oz`, or `-fno-inline` on GCC and Clang.
|
||||
@ -137,6 +138,9 @@ The following macros can be set at compilation time to modify libxxhash's behavi
|
||||
It's possible to skip auto-detection and simply state that the architecture is little-endian by setting this macro to 1.
|
||||
Setting it to 0 states big-endian.
|
||||
|
||||
For the Command Line Interface `xxhsum`, the following environment variables can also be set :
|
||||
- `DISPATCH=1` : use `xxh_x86dispatch.c`, to automatically select between `scalar`, `sse2`, `avx2` or `avx512` instruction set at runtime, depending on local host. This option is only valid for `x86`/`x64` systems.
|
||||
|
||||
|
||||
### Building xxHash - Using vcpkg
|
||||
|
||||
|
662
xxh_x86dispatch.c
Normal file
662
xxh_x86dispatch.c
Normal file
@ -0,0 +1,662 @@
|
||||
/*
|
||||
* xxHash - Extremely Fast Hash algorithm
|
||||
* Copyright (C) 2020 Yann Collet
|
||||
*
|
||||
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* You can contact the author at:
|
||||
* - xxHash homepage: https://www.xxhash.com
|
||||
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Dispatcher code for XXH3 on x86-based targets.
|
||||
*/
|
||||
#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64))
|
||||
# error "Dispatching is currently only supported on x86 and x86_64."
|
||||
#endif
|
||||
|
||||
#ifndef __GNUC__
|
||||
# error "Dispatching requires __attribute__((__target__)) capability"
|
||||
#endif
|
||||
|
||||
#define XXH_DISPATCH_AVX2 /* enable dispatch towards AVX2 */
|
||||
#define XXH_DISPATCH_AVX512 /* enable dispatch towards AVX512 */
|
||||
|
||||
#ifdef XXH_DISPATCH_DEBUG
|
||||
/* debug logging */
|
||||
# include <stdio.h>
|
||||
# define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); }
|
||||
#else
|
||||
# define XXH_debugPrint(str) ((void)0)
|
||||
# define NDEBUG
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# include <immintrin.h> /* sse2 */
|
||||
# include <emmintrin.h> /* avx2 */
|
||||
#elif defined(_MSC_VER)
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
#define XXH_INLINE_ALL
|
||||
#define XXH_X86DISPATCH
|
||||
#define XXH_TARGET_AVX512 __attribute__((__target__("avx512f")))
|
||||
#define XXH_TARGET_AVX2 __attribute__((__target__("avx2")))
|
||||
#define XXH_TARGET_SSE2 __attribute__((__target__("sse2")))
|
||||
#include "xxhash.h"
|
||||
|
||||
/*
|
||||
* Modified version of Intel's guide
|
||||
* https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
|
||||
*/
|
||||
#if defined(_MSC_VER)
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Support both AT&T and Intel dialects
|
||||
*
|
||||
* GCC doesn't convert AT&T syntax to Intel syntax, and will error out if
|
||||
* compiled with -masm=intel. Instead, it supports dialect switching with
|
||||
* curly braces: { AT&T syntax | Intel syntax }
|
||||
*
|
||||
* Clang's integrated assembler automatically converts AT&T syntax to Intel if
|
||||
* needed, making the dialect switching useless (it isn't even supported).
|
||||
*
|
||||
* Note: Comments are written in the inline assembly itself.
|
||||
*/
|
||||
#ifdef __clang__
|
||||
# define I_ATT(intel, att) att "\n\t"
|
||||
#else
|
||||
# define I_ATT(intel, att) "{" att "|" intel "}\n\t"
|
||||
#endif
|
||||
|
||||
|
||||
static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
__cpuidex(abcd, eax, ecx);
|
||||
#else
|
||||
xxh_u32 ebx, edx;
|
||||
# if defined(__i386__) && defined(__PIC__)
|
||||
__asm__(
|
||||
"# Call CPUID\n\t"
|
||||
"#\n\t"
|
||||
"# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t"
|
||||
"# EBX, so we use EDI instead.\n\t"
|
||||
I_ATT("mov edi, ebx", "movl %%ebx, %%edi")
|
||||
I_ATT("cpuid", "cpuid" )
|
||||
I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi")
|
||||
: "=D" (ebx),
|
||||
# else
|
||||
__asm__(
|
||||
"# Call CPUID\n\t"
|
||||
I_ATT("cpuid", "cpuid")
|
||||
: "=b" (ebx),
|
||||
# endif
|
||||
"+a" (eax), "+c" (ecx), "=d" (edx));
|
||||
abcd[0] = eax;
|
||||
abcd[1] = ebx;
|
||||
abcd[2] = ecx;
|
||||
abcd[3] = edx;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512)
|
||||
/*
|
||||
* While the CPU may support AVX2, the operating system might not properly save
|
||||
* the full YMM/ZMM registers.
|
||||
*
|
||||
* xgetbv is used for detecting this: Any compliant operating system will define
|
||||
* a set of flags in the xcr0 register indicating how it saves the AVX registers.
|
||||
*
|
||||
* You can manually disable this flag on Windows by running, as admin:
|
||||
*
|
||||
* bcdedit.exe /set xsavedisable 1
|
||||
*
|
||||
* and rebooting. Run the same command with 0 to re-enable it.
|
||||
*/
|
||||
static xxh_u64 XXH_xgetbv(void)
|
||||
{
|
||||
#if defined(_MSC_VER)
|
||||
return _xgetbv(0); /* min VS2010 SP1 compiler is required */
|
||||
#else
|
||||
xxh_u32 xcr0_lo, xcr0_hi;
|
||||
__asm__(
|
||||
"# Call XGETBV\n\t"
|
||||
"#\n\t"
|
||||
"# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t"
|
||||
"# the XGETBV opcode, so we encode it by hand instead.\n\t"
|
||||
"# See <https://github.com/asmjit/asmjit/issues/78> for details.\n\t"
|
||||
".byte 0x0f, 0x01, 0xd0\n\t"
|
||||
: "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0));
|
||||
return xcr0_lo | ((xxh_u64)xcr0_hi << 32);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#define SSE2_CPUID_MASK (1 << 26)
|
||||
#define OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27))
|
||||
#define AVX2_CPUID_MASK (1 << 5)
|
||||
#define AVX2_XGETBV_MASK ((1 << 2) | (1 << 1))
|
||||
#define AVX512F_CPUID_MASK (1 << 16)
|
||||
#define AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1))
|
||||
|
||||
/* Returns the best XXH3 implementation */
|
||||
static int XXH_featureTest(void)
|
||||
{
|
||||
xxh_u32 abcd[4];
|
||||
xxh_u32 max_leaves;
|
||||
int best = XXH_SCALAR;
|
||||
#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512)
|
||||
xxh_u64 xgetbv_val;
|
||||
#endif
|
||||
#if defined(__GNUC__) && defined(__i386__)
|
||||
xxh_u32 cpuid_supported;
|
||||
__asm__(
|
||||
"# For the sake of ruthless backwards compatibility, check if CPUID\n\t"
|
||||
"# is supported in the EFLAGS on i386.\n\t"
|
||||
"# This is not necessary on x86_64 - CPUID is mandatory.\n\t"
|
||||
"# The ID flag (bit 21) in the EFLAGS register indicates support\n\t"
|
||||
"# for the CPUID instruction. If a software procedure can set and\n\t"
|
||||
"# clear this flag, the processor executing the procedure supports\n\t"
|
||||
"# the CPUID instruction.\n\t"
|
||||
"# <https://c9x.me/x86/html/file_module_x86_id_45.html>\n\t"
|
||||
"#\n\t"
|
||||
"# Routine is from <https://wiki.osdev.org/CPUID>.\n\t"
|
||||
|
||||
"# Save EFLAGS\n\t"
|
||||
I_ATT("pushfd", "pushfl" )
|
||||
"# Store EFLAGS\n\t"
|
||||
I_ATT("pushfd", "pushfl" )
|
||||
"# Invert the ID bit in stored EFLAGS\n\t"
|
||||
I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)")
|
||||
"# Load stored EFLAGS (with ID bit inverted)\n\t"
|
||||
I_ATT("popfd", "popfl" )
|
||||
"# Store EFLAGS again (ID bit may or not be inverted)\n\t"
|
||||
I_ATT("pushfd", "pushfl" )
|
||||
"# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t"
|
||||
I_ATT("pop eax", "popl %%eax" )
|
||||
"# eax = whichever bits were changed\n\t"
|
||||
I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" )
|
||||
"# Restore original EFLAGS\n\t"
|
||||
I_ATT("popfd", "popfl" )
|
||||
"# eax = zero if ID bit can't be changed, else non-zero\n\t"
|
||||
I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" )
|
||||
: "=a" (cpuid_supported) :: "cc");
|
||||
|
||||
if (XXH_unlikely(!cpuid_supported)) {
|
||||
XXH_debugPrint("CPUID support is not detected!");
|
||||
return best;
|
||||
}
|
||||
|
||||
#endif
|
||||
/* Check how many CPUID pages we have */
|
||||
XXH_cpuid(0, 0, abcd);
|
||||
max_leaves = abcd[0];
|
||||
|
||||
/* Shouldn't happen on hardware, but happens on some QEMU configs. */
|
||||
if (XXH_unlikely(max_leaves == 0)) {
|
||||
XXH_debugPrint("Max CPUID leaves == 0!");
|
||||
return best;
|
||||
}
|
||||
|
||||
/* Check for SSE2, OSXSAVE and xgetbv */
|
||||
XXH_cpuid(1, 0, abcd);
|
||||
|
||||
/*
|
||||
* Test for SSE2. The check is redundant on x86_64, but it doesn't hurt.
|
||||
*/
|
||||
if (XXH_unlikely((abcd[3] & SSE2_CPUID_MASK) != SSE2_CPUID_MASK))
|
||||
return best;
|
||||
|
||||
XXH_debugPrint("SSE2 support detected.");
|
||||
|
||||
best = XXH_SSE2;
|
||||
#if defined(XXH_DISPATCH_AVX2) || defined(XXH_DISPATCH_AVX512)
|
||||
/* Make sure we have enough leaves */
|
||||
if (XXH_unlikely(max_leaves < 7))
|
||||
return best;
|
||||
|
||||
/* Test for OSXSAVE and XGETBV */
|
||||
if ((abcd[2] & OSXSAVE_CPUID_MASK) != OSXSAVE_CPUID_MASK)
|
||||
return best;
|
||||
|
||||
/* CPUID check for AVX features */
|
||||
XXH_cpuid(7, 0, abcd);
|
||||
|
||||
xgetbv_val = XXH_xgetbv();
|
||||
#if defined(XXH_DISPATCH_AVX2)
|
||||
/* Validate that AVX2 is supported by the CPU */
|
||||
if ((abcd[1] & AVX2_CPUID_MASK) != AVX2_CPUID_MASK)
|
||||
return best;
|
||||
|
||||
/* Validate that the OS supports YMM registers */
|
||||
if ((xgetbv_val & AVX2_XGETBV_MASK) != AVX2_XGETBV_MASK) {
|
||||
XXH_debugPrint("AVX2 supported by the CPU, but not the OS.");
|
||||
return best;
|
||||
}
|
||||
|
||||
/* AVX2 supported */
|
||||
XXH_debugPrint("AVX2 support detected.");
|
||||
best = XXH_AVX2;
|
||||
#endif
|
||||
#if defined(XXH_DISPATCH_AVX512)
|
||||
/* Check if AVX512F is supported by the CPU */
|
||||
if ((abcd[1] & AVX512F_CPUID_MASK) != AVX512F_CPUID_MASK) {
|
||||
XXH_debugPrint("AVX512F not supported by CPU");
|
||||
return best;
|
||||
}
|
||||
|
||||
/* Validate that the OS supports ZMM registers */
|
||||
if ((xgetbv_val & AVX512F_XGETBV_MASK) != AVX512F_XGETBV_MASK) {
|
||||
XXH_debugPrint("AVX512F supported by the CPU, but not the OS.");
|
||||
return best;
|
||||
}
|
||||
|
||||
/* AVX512F supported */
|
||||
XXH_debugPrint("AVX512F support detected.");
|
||||
best = XXH_AVX512;
|
||||
#endif
|
||||
#endif
|
||||
return best;
|
||||
}
|
||||
|
||||
|
||||
/* === Vector implementations === */
|
||||
|
||||
/* === XXH3, default variants === */
|
||||
|
||||
XXH_NO_INLINE XXH64_hash_t
|
||||
XXHL64_default_scalar(const void* XXH_RESTRICT input, size_t len)
|
||||
{
|
||||
return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t
|
||||
XXHL64_default_sse2(const void* XXH_RESTRICT input, size_t len)
|
||||
{
|
||||
return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t
|
||||
XXHL64_default_avx2(const void* XXH_RESTRICT input, size_t len)
|
||||
{
|
||||
return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
|
||||
}
|
||||
|
||||
#ifdef XXH_DISPATCH_AVX512
|
||||
XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t
|
||||
XXHL64_default_avx512(const void* XXH_RESTRICT input, size_t len)
|
||||
{
|
||||
return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* === XXH3, Seeded variants === */
|
||||
|
||||
XXH_NO_INLINE XXH64_hash_t
|
||||
XXHL64_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
|
||||
XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t
|
||||
XXHL64_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
|
||||
XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t
|
||||
XXHL64_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
|
||||
XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2);
|
||||
}
|
||||
|
||||
#ifdef XXH_DISPATCH_AVX512
|
||||
XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t
|
||||
XXHL64_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
|
||||
XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* === XXH3, Secret variants === */
|
||||
|
||||
XXH_NO_INLINE XXH64_hash_t
|
||||
XXHL64_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
|
||||
XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_SSE2 XXH64_hash_t
|
||||
XXHL64_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
|
||||
XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_AVX2 XXH64_hash_t
|
||||
XXHL64_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
|
||||
XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
|
||||
}
|
||||
|
||||
#ifdef XXH_DISPATCH_AVX512
|
||||
XXH_NO_INLINE XXH_TARGET_AVX512 XXH64_hash_t
|
||||
XXHL64_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_hashLong_64b_internal(input, len, secret, secretLen,
|
||||
XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* === XXH128 default variants === */
|
||||
|
||||
XXH_NO_INLINE XXH128_hash_t
|
||||
XXHL128_default_scalar(const void* XXH_RESTRICT input, size_t len)
|
||||
{
|
||||
return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t
|
||||
XXHL128_default_sse2(const void* XXH_RESTRICT input, size_t len)
|
||||
{
|
||||
return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t
|
||||
XXHL128_default_avx2(const void* XXH_RESTRICT input, size_t len)
|
||||
{
|
||||
return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
|
||||
}
|
||||
|
||||
#ifdef XXH_DISPATCH_AVX512
|
||||
XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t
|
||||
XXHL128_default_avx512(const void* XXH_RESTRICT input, size_t len)
|
||||
{
|
||||
return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* === XXH128 Secret variants === */
|
||||
|
||||
XXH_NO_INLINE XXH128_hash_t
|
||||
XXHL128_secret_scalar(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_hashLong_128b_internal(input, len, secret, secretLen,
|
||||
XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t
|
||||
XXHL128_secret_sse2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_hashLong_128b_internal(input, len, secret, secretLen,
|
||||
XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t
|
||||
XXHL128_secret_avx2(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_hashLong_128b_internal(input, len, secret, secretLen,
|
||||
XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2);
|
||||
}
|
||||
|
||||
#ifdef XXH_DISPATCH_AVX512
|
||||
XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t
|
||||
XXHL128_secret_avx512(const void* XXH_RESTRICT input, size_t len, const void* XXH_RESTRICT secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_hashLong_128b_internal(input, len, secret, secretLen,
|
||||
XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* === XXH128 Seeded variants === */
|
||||
|
||||
XXH_NO_INLINE XXH128_hash_t
|
||||
XXHL128_seed_scalar(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_hashLong_128b_withSeed_internal(input, len, seed,
|
||||
XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar, XXH3_initCustomSecret_scalar);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_SSE2 XXH128_hash_t
|
||||
XXHL128_seed_sse2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_hashLong_128b_withSeed_internal(input, len, seed,
|
||||
XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2, XXH3_initCustomSecret_sse2);
|
||||
}
|
||||
|
||||
XXH_NO_INLINE XXH_TARGET_AVX2 XXH128_hash_t
|
||||
XXHL128_seed_avx2(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_hashLong_128b_withSeed_internal(input, len, seed,
|
||||
XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2, XXH3_initCustomSecret_avx2);
|
||||
}
|
||||
|
||||
#ifdef XXH_DISPATCH_AVX512
|
||||
XXH_NO_INLINE XXH_TARGET_AVX512 XXH128_hash_t
|
||||
XXHL128_seed_avx512(const void* XXH_RESTRICT input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_hashLong_128b_withSeed_internal(input, len, seed,
|
||||
XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512, XXH3_initCustomSecret_avx512);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* ==== Dispatchers ==== */
|
||||
|
||||
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t);
|
||||
|
||||
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
|
||||
|
||||
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
|
||||
|
||||
typedef struct {
|
||||
XXH3_dispatchx86_hashLong64_default hashLong64_default;
|
||||
XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed;
|
||||
XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret;
|
||||
} dispatchFunctions_s;
|
||||
|
||||
static dispatchFunctions_s g_dispatch = { NULL, NULL, NULL};
|
||||
|
||||
#define NB_DISPATCHES 4
|
||||
static const dispatchFunctions_s k_dispatch[NB_DISPATCHES] = {
|
||||
/* scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar },
|
||||
/* sse2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2 },
|
||||
/* avx2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2 },
|
||||
/* avx512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512 }
|
||||
};
|
||||
|
||||
typedef void (*XXH3_dispatchx86_accumulate_512)(void* XXH_RESTRICT acc, const void* XXH_RESTRICT input, const void* XXH_RESTRICT secret, XXH3_accWidth_e accWidth);
|
||||
typedef void (*XXH3_dispatchx86_scrambleAcc)(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret);
|
||||
|
||||
typedef struct {
|
||||
XXH3_dispatchx86_accumulate_512 accumulate_512;
|
||||
XXH3_dispatchx86_scrambleAcc scrambleAcc;
|
||||
} coreFunctions_s;
|
||||
|
||||
static coreFunctions_s g_coreFunc = { NULL, NULL };
|
||||
|
||||
static const coreFunctions_s k_coreFunc[NB_DISPATCHES] = {
|
||||
/* scalar */ { XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar },
|
||||
/* sse2 */ { XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2 },
|
||||
/* avx2 */ { XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2 },
|
||||
/* avx512 */ { XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512 },
|
||||
};
|
||||
|
||||
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t);
|
||||
|
||||
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t);
|
||||
|
||||
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t);
|
||||
|
||||
typedef struct {
|
||||
XXH3_dispatchx86_hashLong128_default hashLong128_default;
|
||||
XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed;
|
||||
XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret;
|
||||
} dispatch128Functions_s;
|
||||
|
||||
static dispatch128Functions_s g_dispatch128 = { NULL, NULL, NULL };
|
||||
|
||||
static const dispatch128Functions_s k_dispatch128[NB_DISPATCHES] = {
|
||||
/* scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar },
|
||||
/* sse2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2 },
|
||||
/* avx2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2 },
|
||||
/* avx512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512 }
|
||||
};
|
||||
|
||||
static void setDispatch(void)
|
||||
{
|
||||
int vecID = XXH_featureTest();
|
||||
XXH_STATIC_ASSERT(XXH_AVX512 == NB_DISPATCHES-1);
|
||||
assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512);
|
||||
#ifndef XXH_DISPATCH_AVX512
|
||||
assert(vecID != XXH_AVX512);
|
||||
#endif
|
||||
#ifndef XXH_DISPATCH_AVX2
|
||||
assert(vecID != XXH_AVX2);
|
||||
#endif
|
||||
g_dispatch = k_dispatch[vecID];
|
||||
g_dispatch128 = k_dispatch128[vecID];
|
||||
g_coreFunc = k_coreFunc[vecID];
|
||||
}
|
||||
|
||||
|
||||
/* ==== XXH3 public functions ==== */
|
||||
|
||||
static XXH64_hash_t
|
||||
XXH3_hashLong_64b_defaultSecret_selection(const xxh_u8* input, size_t len,
|
||||
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
|
||||
{
|
||||
(void)seed64; (void)secret; (void)secretLen;
|
||||
if (g_dispatch.hashLong64_default == NULL) setDispatch();
|
||||
return g_dispatch.hashLong64_default(input, len);
|
||||
}
|
||||
|
||||
XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len)
|
||||
{
|
||||
return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection);
|
||||
}
|
||||
|
||||
static XXH64_hash_t
|
||||
XXH3_hashLong_64b_withSeed_selection(const xxh_u8* input, size_t len,
|
||||
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
|
||||
{
|
||||
(void)secret; (void)secretLen;
|
||||
if (g_dispatch.hashLong64_seed == NULL) setDispatch();
|
||||
return g_dispatch.hashLong64_seed(input, len, seed64);
|
||||
}
|
||||
|
||||
XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection);
|
||||
}
|
||||
|
||||
static XXH64_hash_t
|
||||
XXH3_hashLong_64b_withSecret_selection(const xxh_u8* input, size_t len,
|
||||
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
|
||||
{
|
||||
(void)seed64;
|
||||
if (g_dispatch.hashLong64_secret == NULL) setDispatch();
|
||||
return g_dispatch.hashLong64_secret(input, len, secret, secretLen);
|
||||
}
|
||||
|
||||
XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
|
||||
}
|
||||
|
||||
XXH_errorcode
|
||||
XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
|
||||
{
|
||||
if (g_coreFunc.accumulate_512 == NULL) setDispatch();
|
||||
return XXH3_update(state, (const xxh_u8*)input, len,
|
||||
XXH3_acc_64bits, g_coreFunc.accumulate_512, g_coreFunc.scrambleAcc);
|
||||
}
|
||||
|
||||
|
||||
/* ==== XXH128 public functions ==== */
|
||||
|
||||
static XXH128_hash_t
|
||||
XXH3_hashLong_128b_defaultSecret_selection(const xxh_u8* input, size_t len,
|
||||
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
|
||||
{
|
||||
(void)seed64; (void)secret; (void)secretLen;
|
||||
if (g_dispatch128.hashLong128_default == NULL) setDispatch();
|
||||
return g_dispatch128.hashLong128_default(input, len);
|
||||
}
|
||||
|
||||
XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len)
|
||||
{
|
||||
return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection);
|
||||
}
|
||||
|
||||
static XXH128_hash_t
|
||||
XXH3_hashLong_128b_withSeed_selection(const xxh_u8* input, size_t len,
|
||||
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
|
||||
{
|
||||
(void)secret; (void)secretLen;
|
||||
if (g_dispatch128.hashLong128_seed == NULL) setDispatch();
|
||||
return g_dispatch128.hashLong128_seed(input, len, seed64);
|
||||
}
|
||||
|
||||
XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed)
|
||||
{
|
||||
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection);
|
||||
}
|
||||
|
||||
static XXH128_hash_t
|
||||
XXH3_hashLong_128b_withSecret_selection(const xxh_u8* input, size_t len,
|
||||
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen)
|
||||
{
|
||||
(void)seed64;
|
||||
if (g_dispatch128.hashLong128_secret == NULL) setDispatch();
|
||||
return g_dispatch128.hashLong128_secret(input, len, secret, secretLen);
|
||||
}
|
||||
|
||||
XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen)
|
||||
{
|
||||
return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection);
|
||||
}
|
||||
|
||||
XXH_errorcode
|
||||
XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
|
||||
{
|
||||
if (g_coreFunc.accumulate_512 == NULL) setDispatch();
|
||||
return XXH3_update(state, (const xxh_u8*)input, len,
|
||||
XXH3_acc_128bits, g_coreFunc.accumulate_512, g_coreFunc.scrambleAcc);
|
||||
}
|
88
xxh_x86dispatch.h
Normal file
88
xxh_x86dispatch.h
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* xxHash - XXH3 Dispatcher for x86-based targets
|
||||
* Copyright (C) 2020 Yann Collet
|
||||
*
|
||||
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* You can contact the author at:
|
||||
* - xxHash homepage: https://www.xxhash.com
|
||||
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
||||
*/
|
||||
|
||||
#ifndef XXH_X86DISPATCH_H_13563687684
|
||||
#define XXH_X86DISPATCH_H_13563687684
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include "xxhash.h" /* XXH64_hash_t, XXH3_state_t */
|
||||
|
||||
XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len);
|
||||
XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed);
|
||||
XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen);
|
||||
XXH_errorcode XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len);
|
||||
|
||||
XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len);
|
||||
XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed);
|
||||
XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen);
|
||||
XXH_errorcode XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len);
|
||||
|
||||
|
||||
/* automatic replacement of XXH3 functions.
|
||||
* can be disabled by setting XXH_DISPATCH_DISABLE_REPLACE */
|
||||
#ifndef XXH_DISPATCH_DISABLE_REPLACE
|
||||
|
||||
# undef XXH3_64bits
|
||||
# define XXH3_64bits XXH3_64bits_dispatch
|
||||
# undef XXH3_64bits_withSeed
|
||||
# define XXH3_64bits_withSeed XXH3_64bits_withSeed_dispatch
|
||||
# undef XXH3_64bits_withSecret
|
||||
# define XXH3_64bits_withSecret XXH3_64bits_withSecret_dispatch
|
||||
# undef XXH3_64bits_update
|
||||
# define XXH3_64bits_update XXH3_64bits_update_dispatch
|
||||
|
||||
# undef XXH128
|
||||
# define XXH128 XXH3_128bits_withSeed_dispatch
|
||||
# define XXH3_128bits XXH3_128bits_dispatch
|
||||
# undef XXH3_128bits
|
||||
# define XXH3_128bits XXH3_128bits_dispatch
|
||||
# undef XXH3_128bits_withSeed
|
||||
# define XXH3_128bits_withSeed XXH3_128bits_withSeed_dispatch
|
||||
# undef XXH3_128bits_withSecret
|
||||
# define XXH3_128bits_withSecret XXH3_128bits_withSecret_dispatch
|
||||
# undef XXH3_128bits_update
|
||||
# define XXH3_128bits_update XXH3_128bits_update_dispatch
|
||||
|
||||
#endif /* XXH_DISPATCH_DISABLE_REPLACE */
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* XXH_X86DISPATCH_H_13563687684 */
|
37
xxhash.h
37
xxhash.h
@ -899,26 +899,27 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size)
|
||||
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
|
||||
#endif
|
||||
|
||||
#if XXH_NO_INLINE_HINTS /* disable inlining hints */
|
||||
# define XXH_FORCE_INLINE static
|
||||
# define XXH_NO_INLINE static
|
||||
#elif defined(_MSC_VER) /* Visual Studio */
|
||||
# define XXH_FORCE_INLINE static __forceinline
|
||||
# define XXH_NO_INLINE static __declspec(noinline)
|
||||
#else
|
||||
# if defined (__cplusplus) \
|
||||
|| defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
|
||||
# ifdef __GNUC__
|
||||
# define XXH_FORCE_INLINE static inline __attribute__((always_inline))
|
||||
# define XXH_NO_INLINE static __attribute__((noinline))
|
||||
# else
|
||||
# define XXH_FORCE_INLINE static inline
|
||||
# define XXH_NO_INLINE static
|
||||
# endif
|
||||
#if XXH_NO_INLINE_HINTS /* disable inlining hints */
|
||||
# if defined(__GNUC__)
|
||||
# define XXH_FORCE_INLINE static __attribute__((unused))
|
||||
# else
|
||||
# define XXH_FORCE_INLINE static
|
||||
# define XXH_NO_INLINE static
|
||||
# endif /* __STDC_VERSION__ */
|
||||
# endif
|
||||
# define XXH_NO_INLINE static
|
||||
/* enable inlining hints */
|
||||
#elif defined(_MSC_VER) /* Visual Studio */
|
||||
# define XXH_FORCE_INLINE static __forceinline
|
||||
# define XXH_NO_INLINE static __declspec(noinline)
|
||||
#elif defined(__GNUC__)
|
||||
# define XXH_FORCE_INLINE static __inline__ __attribute__((always_inline, unused))
|
||||
# define XXH_NO_INLINE static __attribute__((noinline))
|
||||
#elif defined (__cplusplus) \
|
||||
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* C99 */
|
||||
# define XXH_FORCE_INLINE static inline
|
||||
# define XXH_NO_INLINE static
|
||||
#else
|
||||
# define XXH_FORCE_INLINE static
|
||||
# define XXH_NO_INLINE static
|
||||
#endif
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user