Merge pull request #308 from Cyan4973/mul32len8test

Last variant for the 4to8 segment (mul32to64)
This commit is contained in:
Yann Collet 2020-02-24 09:52:33 -08:00 committed by GitHub
commit 71f0f6ffd3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 59 additions and 58 deletions

40
xxh3.h
View File

@ -58,6 +58,16 @@
# define XXH_RESTRICT /* disable */
#endif
#if (defined(__GNUC__) && (__GNUC__ >= 3)) \
|| (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
|| defined(__clang__)
# define XXH_likely(x) __builtin_expect(x, 1)
# define XXH_unlikely(x) __builtin_expect(x, 0)
#else
# define XXH_likely(x) (x)
# define XXH_unlikely(x) (x)
#endif
#if defined(__GNUC__)
# if defined(__AVX2__)
# include <immintrin.h>
@ -563,7 +573,7 @@ XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
{
h64 ^= h64 >> 37;
h64 *= PRIME64_3;
h64 *= 0x165667919E3779F9ULL;
h64 ^= h64 >> 32;
return h64;
}
@ -582,7 +592,7 @@ XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
{ xxh_u8 const c1 = input[0];
xxh_u8 const c2 = input[len >> 1];
xxh_u8 const c3 = input[len - 1];
xxh_u32 const combined = ((xxh_u32)c1) | (((xxh_u32)c2) << 8) | (((xxh_u32)c3) << 16) | (((xxh_u32)len) << 24);
xxh_u32 const combined = ((xxh_u32)c1<<16) | (((xxh_u32)c2) << 24) | (((xxh_u32)c3) << 0) | (((xxh_u32)len) << 8);
xxh_u64 const keyed = (xxh_u64)combined ^ (XXH_readLE32(secret) + seed);
xxh_u64 const mixed = keyed * PRIME64_1;
return XXH3_avalanche(mixed);
@ -594,13 +604,17 @@ XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
{
XXH_ASSERT(input != NULL);
XXH_ASSERT(secret != NULL);
XXH_ASSERT(4 <= len && len <= 8);
{ xxh_u32 const input_lo = XXH_readLE32(input);
xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
xxh_u64 const input_64 = input_lo | ((xxh_u64)input_hi << 32);
xxh_u64 const keyed = input_64 ^ (XXH_readLE64(secret) + seed);
xxh_u64 const mix64 = len + ((keyed ^ (keyed >> 51)) * PRIME32_1);
return XXH3_avalanche((mix64 ^ (mix64 >> 47)) * PRIME64_2);
XXH_ASSERT(4 <= len && len < 8);
seed ^= seed << 32;
{ xxh_u32 const input1 = XXH_readLE32(input);
xxh_u32 const input2 = XXH_readLE32(input + len - 4);
xxh_u32 const key1 = XXH_swap32(input1) ^ ((xxh_u32)(seed >> 32) + XXH_readLE32(secret));
xxh_u32 const key2 = input2 ^ (XXH_readLE32(secret+4) - (xxh_u32)seed);
xxh_u64 const mix = XXH_mult32to64(key1, key2)
+ ((xxh_u64)input1 << 32)
+ ((xxh_u64)(XXH_rotl32(input2,23)) << 32)
+ len;
return XXH3_avalanche(mix ^ (mix >> 59));
}
}
@ -609,8 +623,8 @@ XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
{
XXH_ASSERT(input != NULL);
XXH_ASSERT(secret != NULL);
XXH_ASSERT(9 <= len && len <= 16);
{ xxh_u64 const input_lo = XXH_readLE64(input) ^ (XXH_readLE64(secret) + seed);
XXH_ASSERT(8 <= len && len <= 16);
{ xxh_u64 const input_lo = XXH_readLE64(input) ^ XXH_readLE64(secret);
xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ (XXH_readLE64(secret + 8) - seed);
xxh_u64 const acc = len + (input_lo + input_hi) + XXH3_mul128_fold64(input_lo, input_hi);
return XXH3_avalanche(acc);
@ -621,8 +635,8 @@ XXH_FORCE_INLINE XXH64_hash_t
XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
{
XXH_ASSERT(len <= 16);
{ if (len > 8) return XXH3_len_9to16_64b(input, len, secret, seed);
if (len >= 4) return XXH3_len_4to8_64b(input, len, secret, seed);
{ if (XXH_likely(len > 8)) return XXH3_len_9to16_64b(input, len, secret, seed);
if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
return XXH3_avalanche((PRIME64_1 + seed) ^ XXH_readLE64(secret));
}

View File

@ -687,7 +687,7 @@ static void BMK_testXXH3(const void* data, size_t len, U64 seed, U64 Nresult)
} }
}
static void BMK_testXXH3_withSecret(const void* data, size_t len, const void* secret, size_t secretSize, U64 Nresult)
void BMK_testXXH3_withSecret(const void* data, size_t len, const void* secret, size_t secretSize, U64 Nresult)
{
if (len>0) assert(data != NULL);
@ -787,46 +787,44 @@ static void BMK_sanityCheck(void)
BMK_testXXH64(sanityBuffer,222, 0, 0xB641AE8CB691C174ULL);
BMK_testXXH64(sanityBuffer,222, prime, 0x20CB8AB7AE10C14AULL);
BMK_testXXH3(NULL, 0, 0, 0x383739D89B1CF3E3ULL); /* empty string */
BMK_testXXH3(NULL, 0, prime64, 0xAA62E4419E745027ULL);
BMK_testXXH3(sanityBuffer, 1, 0, 0x7198D737CFE7F386ULL); /* 1 - 3 */
BMK_testXXH3(sanityBuffer, 1, prime64, 0xB70252DB7161C2BDULL); /* 1 - 3 */
BMK_testXXH3(sanityBuffer, 6, 0, 0x22CBF5F3E1F6257CULL); /* 4 - 8 */
BMK_testXXH3(sanityBuffer, 6, prime64, 0x6398631C12AB94CEULL); /* 4 - 8 */
BMK_testXXH3(sanityBuffer, 12, 0, 0xD5361CCEEBB5A0CCULL); /* 9 - 16 */
BMK_testXXH3(sanityBuffer, 12, prime64, 0xC4C125E75A808C3DULL); /* 9 - 16 */
BMK_testXXH3(sanityBuffer, 24, 0, 0x46796F3F78B20F6BULL); /* 17 - 32 */
BMK_testXXH3(sanityBuffer, 24, prime64, 0x60171A7CD0A44C10ULL); /* 17 - 32 */
BMK_testXXH3(sanityBuffer, 48, 0, 0xD8D4D3590D136E11ULL); /* 33 - 64 */
BMK_testXXH3(sanityBuffer, 48, prime64, 0x05441F2AEC2A1296ULL); /* 33 - 64 */
BMK_testXXH3(sanityBuffer, 80, 0, 0xA1DC8ADB3145B86AULL); /* 65 - 96 */
BMK_testXXH3(sanityBuffer, 80, prime64, 0xC9D55256965B7093ULL); /* 65 - 96 */
BMK_testXXH3(sanityBuffer, 112, 0, 0xE43E5717A61D3759ULL); /* 97 -128 */
BMK_testXXH3(sanityBuffer, 112, prime64, 0x5A5F89A3FECE44A5ULL); /* 97 -128 */
BMK_testXXH3(sanityBuffer, 195, 0, 0x6F747739CBAC22A5ULL); /* 129-240 */
BMK_testXXH3(sanityBuffer, 195, prime64, 0x33368E23C7F95810ULL); /* 129-240 */
BMK_testXXH3(NULL, 0, 0, 0x879CDF7824B71543ULL); /* empty string */
BMK_testXXH3(NULL, 0, prime64, 0xD51AC181E10C75E7ULL);
BMK_testXXH3(sanityBuffer, 1, 0, 0x75161D5DC4F378E9ULL); /* 1 - 3 */
BMK_testXXH3(sanityBuffer, 1, prime64, 0x9416563B6EC79D3FULL); /* 1 - 3 */
#if 0 // tests to update
BMK_testXXH3(sanityBuffer, 6, 0, 0x3DB90BED7A20AF98ULL); /* 4 - 8 */
BMK_testXXH3(sanityBuffer, 6, prime64, 0x19F27058CC2CA6A2ULL); /* 4 - 8 */
BMK_testXXH3(sanityBuffer, 12, 0, 0xC3A48A8EFD27368CULL); /* 9 - 16 */
BMK_testXXH3(sanityBuffer, 12, prime64, 0x177916BD2DC8DCB3ULL); /* 9 - 16 */
BMK_testXXH3(sanityBuffer, 24, 0, 0xA3FE70BF9D3510EBULL); /* 17 - 32 */
BMK_testXXH3(sanityBuffer, 24, prime64, 0x850E80FC35BDD690ULL); /* 17 - 32 */
BMK_testXXH3(sanityBuffer, 48, 0, 0x397DA259ECBA1F11ULL); /* 33 - 64 */
BMK_testXXH3(sanityBuffer, 48, prime64, 0xADC2CBAA44ACC616ULL); /* 33 - 64 */
BMK_testXXH3(sanityBuffer, 80, 0, 0xBCDEFBBB2C47C90AULL); /* 65 - 96 */
BMK_testXXH3(sanityBuffer, 80, prime64, 0xC6DD0CB699532E73ULL); /* 65 - 96 */
BMK_testXXH3(sanityBuffer, 195, 0, 0xCD94217EE362EC3AULL); /* 129-240 */
BMK_testXXH3(sanityBuffer, 195, prime64, 0xBA68003D370CB3D9ULL); /* 129-240 */
BMK_testXXH3(sanityBuffer, 403, 0, 0x4834389B15D981E8ULL); /* one block, last stripe is overlapping */
BMK_testXXH3(sanityBuffer, 403, prime64, 0x85CE5DFFC7B07C87ULL); /* one block, last stripe is overlapping */
BMK_testXXH3(sanityBuffer, 512, 0, 0x6A1B982631F059A8ULL); /* one block, finishing at stripe boundary */
BMK_testXXH3(sanityBuffer, 512, prime64, 0x10086868CF0ADC99ULL); /* one block, finishing at stripe boundary */
BMK_testXXH3(sanityBuffer,2048, 0, 0xEFEFD4449323CDD4ULL); /* 2 blocks, finishing at block boundary */
BMK_testXXH3(sanityBuffer,2048, prime64, 0x01C85E405ECA3F6EULL); /* 2 blocks, finishing at block boundary */
BMK_testXXH3(sanityBuffer,2240, 0, 0x998C0437486672C7ULL); /* 3 blocks, finishing at stripe boundary */
BMK_testXXH3(sanityBuffer,2240, prime64, 0x4ED38056B87ABC7FULL); /* 3 blocks, finishing at stripe boundary */
BMK_testXXH3(sanityBuffer,2243, 0, 0xA559D20581D742D3ULL); /* 3 blocks, last stripe is overlapping */
BMK_testXXH3(sanityBuffer,2243, prime64, 0x96E051AB57F21FC8ULL); /* 3 blocks, last stripe is overlapping */
BMK_testXXH3(sanityBuffer, 403, 0, 0x1B2AFF3B46C74648ULL); /* one block, last stripe is overlapping */
BMK_testXXH3(sanityBuffer, 403, prime64, 0xB654F6FFF42AD787ULL); /* one block, last stripe is overlapping */
BMK_testXXH3(sanityBuffer, 512, 0, 0x43E368661808A9E8ULL); /* one block, finishing at stripe boundary */
BMK_testXXH3(sanityBuffer, 512, prime64, 0x3A865148E584E5B9ULL); /* one block, finishing at stripe boundary */
BMK_testXXH3(sanityBuffer,2048, 0, 0xC7169244BBDA8BD4ULL); /* 2 blocks, finishing at block boundary */
BMK_testXXH3(sanityBuffer,2048, prime64, 0x74BF9A802BBDFBAEULL); /* 2 blocks, finishing at block boundary */
BMK_testXXH3(sanityBuffer,2240, 0, 0x30FEB637E114C0C7ULL); /* 3 blocks, finishing at stripe boundary */
BMK_testXXH3(sanityBuffer,2240, prime64, 0xEEF78A36185EB61FULL); /* 3 blocks, finishing at stripe boundary */
BMK_testXXH3(sanityBuffer,2243, 0, 0x62C631454648A193ULL); /* 3 blocks, last stripe is overlapping */
BMK_testXXH3(sanityBuffer,2243, prime64, 0x6CF80A4BADEA4428ULL); /* 3 blocks, last stripe is overlapping */
{ const void* const secret = sanityBuffer + 7;
const size_t secretSize = XXH3_SECRET_SIZE_MIN + 11;
BMK_testXXH3_withSecret(NULL, 0, secret, secretSize, 0x367FF684075249CEULL); /* empty string */
BMK_testXXH3_withSecret(NULL, 0, secret, secretSize, 0xBAD873448BF5CC0EULL); /* empty string */
BMK_testXXH3_withSecret(sanityBuffer, 1, secret, secretSize, 0x7F69735D618DB3F0ULL); /* 1 - 3 */
BMK_testXXH3_withSecret(sanityBuffer, 6, secret, secretSize, 0xBFCC7CB1B3554DCEULL); /* 4 - 8 */
BMK_testXXH3_withSecret(sanityBuffer, 12, secret, secretSize, 0x8C50DC90AC9206FCULL); /* 9 - 16 */
BMK_testXXH3_withSecret(sanityBuffer, 24, secret, secretSize, 0x1CD2C2EE9B9A0928ULL); /* 17 - 32 */
BMK_testXXH3_withSecret(sanityBuffer, 48, secret, secretSize, 0xA785256D9D65D514ULL); /* 33 - 64 */
BMK_testXXH3_withSecret(sanityBuffer, 80, secret, secretSize, 0x6F3053360D21BBB7ULL); /* 65 - 96 */
BMK_testXXH3_withSecret(sanityBuffer, 112, secret, secretSize, 0x560E82D25684154CULL); /* 97 -128 */
BMK_testXXH3_withSecret(sanityBuffer, 195, secret, secretSize, 0xBA5BDDBC5A767B11ULL); /* 129-240 */
BMK_testXXH3_withSecret(sanityBuffer, 403, secret, secretSize, 0xFC3911BBA656DB58ULL); /* one block, last stripe is overlapping */
@ -878,18 +876,6 @@ static void BMK_sanityCheck(void)
{ XXH128_hash_t const expected = { 0xAAF9F05DA0993E3CULL, 0x01752B9AFA24C856ULL };
BMK_testXXH128(sanityBuffer, 81, prime, expected); /* 65-96 */
}
{ XXH128_hash_t const expected = { 0x01EE4637BFB66A1BULL, 0xE5CF6E0E85E92048ULL };
BMK_testXXH128(sanityBuffer, 103, 0, expected); /* 97-128 */
}
{ XXH128_hash_t const expected = { 0x784D8A364F48D048ULL, 0x9010B884DAA01151ULL };
BMK_testXXH128(sanityBuffer, 103, prime, expected); /* 97-128 */
}
{ XXH128_hash_t const expected = { 0x5FA77B9DFE8B5CAEULL, 0x2834B37CEC6A753FULL };
BMK_testXXH128(sanityBuffer, 192, 0, expected); /* 129-240 */
}
{ XXH128_hash_t const expected = { 0x75441CE0359A979AULL, 0x399E2847427B3904ULL };
BMK_testXXH128(sanityBuffer, 192, prime, expected); /* 129-240 */
}
{ XXH128_hash_t const expected = { 0xB02CC10BCFE61194ULL, 0xA27C9ABC8C06E4DDULL };
BMK_testXXH128(sanityBuffer, 222, 0, expected); /* 129-240 */
}
@ -921,11 +907,12 @@ static void BMK_sanityCheck(void)
BMK_testXXH128(sanityBuffer,2240, prime, expected); /* two blocks, ends at stripe boundary */
}
{ XXH128_hash_t const expected = { 0x970C91411533862CULL, 0x4BBD06FF7BFF0AB1ULL };
BMK_testXXH128(sanityBuffer,2237, 0, expected); /* two blocks, ends at stripe boundary */
BMK_testXXH128(sanityBuffer,2237, 0, expected); /* two blocks, last stripe is overlapping */
}
{ XXH128_hash_t const expected = { 0xD80282846D814431ULL, 0x14EBB157B84D9785ULL };
BMK_testXXH128(sanityBuffer,2237, prime, expected); /* two blocks, ends at stripe boundary */
BMK_testXXH128(sanityBuffer,2237, prime, expected); /* two blocks, last stripe is overlapping */
}
#endif
DISPLAYLEVEL(3, "\r%70s\r", ""); /* Clean display line */
DISPLAYLEVEL(3, "Sanity check -- all tests ok\n");