added XXH3_64bits_update_dispatch

This commit is contained in:
Yann Collet 2020-06-10 12:12:24 -07:00
parent 7c8b6995ff
commit 21a9a8c995
4 changed files with 54 additions and 15 deletions

View File

@ -74,7 +74,7 @@ default: DEBUGFLAGS=
default: lib xxhsum_and_links
.PHONY: all
all: lib xxhsum xxhsum_inlinedXXH dispatch
all: lib xxhsum xxhsum_inlinedXXH
## xxhsum is the command line interface (CLI)
xxhsum: xxhash.o xxhsum.o
@ -84,6 +84,7 @@ xxhsum32: CFLAGS += -m32 ## generate CLI in 32-bits mode
xxhsum32: xxhash.c xxhsum.c ## do not generate object (avoid mixing different ABI)
$(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT)
## dispatch only works for x86/x64 systems
dispatch: CPPFLAGS += -DXXHSUM_DISPATCH=1
dispatch: xxhash.o xxh_x86dispatch.o xxhsum.c
$(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT)

36
xxh3.h
View File

@ -1478,9 +1478,9 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
XXH_FORCE_INLINE void
XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
const void* XXH_RESTRICT input,
const void* XXH_RESTRICT secret,
XXH3_accWidth_e accWidth)
const void* XXH_RESTRICT input,
const void* XXH_RESTRICT secret,
XXH3_accWidth_e accWidth)
{
XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64* const xacc = (xxh_u64*) acc; /* presumed aligned */
const xxh_u8* const xinput = (const xxh_u8*) input; /* no alignment restriction */
@ -1994,18 +1994,20 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
const xxh_u8* XXH_RESTRICT input, size_t totalStripes,
const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
XXH3_accWidth_e accWidth)
XXH3_accWidth_e accWidth,
XXH3_f_accumulate_512 f_acc512,
XXH3_f_scrambleAcc f_scramble)
{
XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
if (nbStripesPerBlock - *nbStripesSoFarPtr <= totalStripes) {
/* need a scrambling operation */
size_t const nbStripes = nbStripesPerBlock - *nbStripesSoFarPtr;
XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, accWidth, XXH3_accumulate_512);
XXH3_scrambleAcc(acc, secret + secretLimit);
XXH3_accumulate(acc, input + nbStripes * XXH_STRIPE_LEN, secret, totalStripes - nbStripes, accWidth, XXH3_accumulate_512);
XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, accWidth, f_acc512);
f_scramble(acc, secret + secretLimit);
XXH3_accumulate(acc, input + nbStripes * XXH_STRIPE_LEN, secret, totalStripes - nbStripes, accWidth, f_acc512);
*nbStripesSoFarPtr = totalStripes - nbStripes;
} else {
XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, totalStripes, accWidth, XXH3_accumulate_512);
XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, totalStripes, accWidth, f_acc512);
*nbStripesSoFarPtr += totalStripes;
}
}
@ -2014,7 +2016,11 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
* Both XXH3_64bits_update and XXH3_128bits_update use this routine.
*/
XXH_FORCE_INLINE XXH_errorcode
XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_e accWidth)
XXH3_update(XXH3_state_t* state,
const xxh_u8* input, size_t len,
XXH3_accWidth_e accWidth,
XXH3_f_accumulate_512 f_acc512,
XXH3_f_scrambleAcc f_scramble)
{
if (input==NULL)
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
@ -2050,7 +2056,7 @@ XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_
&state->nbStripesSoFar, state->nbStripesPerBlock,
state->buffer, XXH3_INTERNALBUFFER_STRIPES,
secret, state->secretLimit,
accWidth);
accWidth, f_acc512, f_scramble);
state->bufferedSize = 0;
}
@ -2062,7 +2068,7 @@ XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_
&state->nbStripesSoFar, state->nbStripesPerBlock,
input, XXH3_INTERNALBUFFER_STRIPES,
secret, state->secretLimit,
accWidth);
accWidth, f_acc512, f_scramble);
input += XXH3_INTERNALBUFFER_SIZE;
} while (input<=limit);
/* for last partial stripe */
@ -2081,7 +2087,8 @@ XXH3_update(XXH3_state_t* state, const xxh_u8* input, size_t len, XXH3_accWidth_
XXH_PUBLIC_API XXH_errorcode
XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
{
return XXH3_update(state, (const xxh_u8*)input, len, XXH3_acc_64bits);
return XXH3_update(state, (const xxh_u8*)input, len,
XXH3_acc_64bits, XXH3_accumulate_512, XXH3_scrambleAcc);
}
@ -2103,7 +2110,7 @@ XXH3_digest_long (XXH64_hash_t* acc,
&nbStripesSoFar, state->nbStripesPerBlock,
state->buffer, nbStripes,
secret, state->secretLimit,
accWidth);
accWidth, XXH3_accumulate_512, XXH3_scrambleAcc);
if (state->bufferedSize % XXH_STRIPE_LEN) { /* one last partial stripe */
XXH3_accumulate_512(acc,
state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
@ -2557,7 +2564,8 @@ XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
XXH_PUBLIC_API XXH_errorcode
XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
{
return XXH3_update(state, (const xxh_u8*)input, len, XXH3_acc_128bits);
return XXH3_update(state, (const xxh_u8*)input, len,
XXH3_acc_128bits, XXH3_accumulate_512, XXH3_scrambleAcc);
}
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)

View File

@ -409,6 +409,24 @@ static const dispatchFunctions_s k_dispatch[NB_DISPATCHES] = {
/* avx512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512 }
};
typedef void (*XXH3_dispatchx86_accumulate_512)(void* XXH_RESTRICT acc, const void* XXH_RESTRICT input, const void* XXH_RESTRICT secret, XXH3_accWidth_e accWidth);
typedef void (*XXH3_dispatchx86_scrambleAcc)(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret);
typedef struct {
XXH3_dispatchx86_accumulate_512 accumulate_512;
XXH3_dispatchx86_scrambleAcc scrambleAcc;
} coreFunctions_s;
static coreFunctions_s g_coreFunc = { NULL, NULL };
static const coreFunctions_s k_coreFunc[NB_DISPATCHES] = {
/* scalar */ { XXH3_accumulate_512_scalar, XXH3_scrambleAcc_scalar },
/* sse2 */ { XXH3_accumulate_512_sse2, XXH3_scrambleAcc_sse2 },
/* avx2 */ { XXH3_accumulate_512_avx2, XXH3_scrambleAcc_avx2 },
/* avx512 */ { XXH3_accumulate_512_avx512, XXH3_scrambleAcc_avx512 },
};
static void setDispatch(void)
{
@ -421,6 +439,7 @@ static void setDispatch(void)
assert(vecID != XXH_AVX2);
#endif
g_dispatch = k_dispatch[vecID];
g_coreFunc = k_coreFunc[vecID];
}
@ -465,3 +484,11 @@ XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, cons
{
return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection);
}
XXH_errorcode
XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len)
{
if (g_coreFunc.accumulate_512 == NULL) setDispatch();
return XXH3_update(state, (const xxh_u8*)input, len,
XXH3_acc_64bits, g_coreFunc.accumulate_512, g_coreFunc.scrambleAcc);
}

View File

@ -45,6 +45,7 @@ extern "C" {
XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len);
XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed);
XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen);
XXH_errorcode XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len);
/* automatic replacement of XXH3 functions.
@ -57,6 +58,8 @@ XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, cons
# define XXH3_64bits_withSeed XXH3_64bits_withSeed_dispatch
# undef XXH3_64bits_withSecret
# define XXH3_64bits_withSecret XXH3_64bits_withSecret_dispatch
# undef XXH3_64bits_update
# define XXH3_64bits_update XXH3_64bits_update_dispatch
#endif /* XXH_DISPATCH_DISABLE_RENAME */