diff --git a/Makefile b/Makefile index 516b31e..3edd250 100644 --- a/Makefile +++ b/Makefile @@ -74,7 +74,7 @@ default: DEBUGFLAGS= default: lib xxhsum_and_links .PHONY: all -all: lib xxhsum xxhsum_inlinedXXH dispatch +all: lib xxhsum xxhsum_inlinedXXH ## xxhsum is the command line interface (CLI) xxhsum: xxhash.o xxhsum.o @@ -84,6 +84,7 @@ xxhsum32: CFLAGS += -m32 ## generate CLI in 32-bits mode xxhsum32: xxhash.c xxhsum.c ## do not generate object (avoid mixing different ABI) $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT) +## dispatch only works for x86/x64 systems dispatch: CPPFLAGS += -DXXHSUM_DISPATCH=1 dispatch: xxhash.o xxh_x86dispatch.o xxhsum.c $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT) diff --git a/xxh3.h b/xxh3.h index 2d9c269..d6291e9 100644 --- a/xxh3.h +++ b/xxh3.h @@ -1478,9 +1478,9 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret) XXH_FORCE_INLINE void XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc, - const void* XXH_RESTRICT input, - const void* XXH_RESTRICT secret, - XXH3_accWidth_e accWidth) + const void* XXH_RESTRICT input, + const void* XXH_RESTRICT secret, + XXH3_accWidth_e accWidth) { XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */ const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */ @@ -1994,18 +1994,20 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock, const xxh_u8* XXH_RESTRICT input, size_t totalStripes, const xxh_u8* XXH_RESTRICT secret, size_t secretLimit, - XXH3_accWidth_e accWidth) + XXH3_accWidth_e accWidth, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) { XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock); if (nbStripesPerBlock - *nbStripesSoFarPtr <= totalStripes) { /* need a scrambling operation */ size_t const nbStripes = nbStripesPerBlock - *nbStripesSoFarPtr; - XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, accWidth, XXH3_accumulate_512); - XXH3_scrambleAcc(acc, secret + secretLimit); - XXH3_accumulate(acc, input + nbStripes * XXH_STRIPE_LEN, secret, totalStripes - nbStripes, accWidth, XXH3_accumulate_512); + XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, accWidth, f_acc512); + f_scramble(acc, secret + secretLimit); + XXH3_accumulate(acc, input + nbStripes * XXH_STRIPE_LEN, secret, totalStripes - nbStripes, accWidth, f_acc512); *nbStripesSoFarPtr = totalStripes - nbStripes; } else { - XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, totalStripes, accWidth, XXH3_accumulate_512); + XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, totalStripes, accWidth, f_acc512); *nbStripesSoFarPtr += totalStripes; } } @@ -2014,7 +2016,11 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc, * Both XXH3_64bits_update and XXH3_128bits_update use this routine. */ XXH_FORCE_INLINE XXH_errorcode -XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_e accWidth) +XXH3_update(XXH3_state_t* state, + const xxh_u8* input, size_t len, + XXH3_accWidth_e accWidth, + XXH3_f_accumulate_512 f_acc512, + XXH3_f_scrambleAcc f_scramble) { if (input==NULL) #if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1) @@ -2050,7 +2056,7 @@ XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_ &state->nbStripesSoFar, state->nbStripesPerBlock, state->buffer, XXH3_INTERNALBUFFER_STRIPES, secret, state->secretLimit, - accWidth); + accWidth, f_acc512, f_scramble); state->bufferedSize = 0; } @@ -2062,7 +2068,7 @@ XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_ &state->nbStripesSoFar, state->nbStripesPerBlock, input, XXH3_INTERNALBUFFER_STRIPES, secret, state->secretLimit, - accWidth); + accWidth, f_acc512, f_scramble); input += XXH3_INTERNALBUFFER_SIZE; } while (input<=limit); /* for last partial stripe */ @@ -2081,7 +2087,8 @@ XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_ XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len) { - return XXH3_update(state, (const xxh_u8*)input, len, XXH3_acc_64bits); + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_acc_64bits, XXH3_accumulate_512, XXH3_scrambleAcc); } @@ -2103,7 +2110,7 @@ XXH3_digest_long (XXH64_hash_t* acc, &nbStripesSoFar, state->nbStripesPerBlock, state->buffer, nbStripes, secret, state->secretLimit, - accWidth); + accWidth, XXH3_accumulate_512, XXH3_scrambleAcc); if (state->bufferedSize % XXH_STRIPE_LEN) { /* one last partial stripe */ XXH3_accumulate_512(acc, state->buffer + state->bufferedSize - XXH_STRIPE_LEN, @@ -2557,7 +2564,8 @@ XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed) XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len) { - return XXH3_update(state, (const xxh_u8*)input, len, XXH3_acc_128bits); + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_acc_128bits, XXH3_accumulate_512, XXH3_scrambleAcc); } XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state) diff --git a/xxh_x86dispatch.c b/xxh_x86dispatch.c index dd99ec7..950ffe3 100644 --- a/xxh_x86dispatch.c +++ b/xxh_x86dispatch.c @@ -409,6 +409,24 @@ static const dispatchFunctions_s k_dispatch[NB_DISPATCHES] = { /* avx512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512 } }; +typedef void (*XXH3_dispatchx86_accumulate_512)(void* XXH_RESTRICT acc, const void* XXH_RESTRICT input, const void* XXH_RESTRICT secret, XXH3_accWidth_e accWidth); +typedef void (*XXH3_dispatchx86_scrambleAcc)(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret); + +typedef struct { + XXH3_dispatchx86_accumulate_512 accumulate_512; + XXH3_dispatchx86_scrambleAcc scrambleAcc; +} coreFunctions_s; + +static coreFunctions_s g_coreFunc = { NULL, NULL }; + +static const coreFunctions_s k_coreFunc[NB_DISPATCHES] = { + /* scalar */ { XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar }, + /* sse2 */ { XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2 }, + /* avx2 */ { XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2 }, + /* avx512 */ { XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512 }, +}; + + static void setDispatch(void) { @@ -421,6 +439,7 @@ static void setDispatch(void) assert(vecID != XXH_AVX2); #endif g_dispatch = k_dispatch[vecID]; + g_coreFunc = k_coreFunc[vecID]; } @@ -465,3 +484,11 @@ XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, cons { return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection); } + +XXH_errorcode +XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) +{ + if (g_coreFunc.accumulate_512 == NULL) setDispatch(); + return XXH3_update(state, (const xxh_u8*)input, len, + XXH3_acc_64bits, g_coreFunc.accumulate_512, g_coreFunc.scrambleAcc); +} diff --git a/xxh_x86dispatch.h b/xxh_x86dispatch.h index d86f89f..d08c680 100644 --- a/xxh_x86dispatch.h +++ b/xxh_x86dispatch.h @@ -45,6 +45,7 @@ extern "C" { XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len); XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed); XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen); +XXH_errorcode XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len); /* automatic replacement of XXH3 functions. @@ -57,6 +58,8 @@ XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, cons # define XXH3_64bits_withSeed XXH3_64bits_withSeed_dispatch # undef XXH3_64bits_withSecret # define XXH3_64bits_withSecret XXH3_64bits_withSecret_dispatch +# undef XXH3_64bits_update +# define XXH3_64bits_update XXH3_64bits_update_dispatch #endif /* XXH_DISPATCH_DISABLE_RENAME */