new xxHash API (breaking changes)

This commit is contained in:
Yann Collet 2014-09-25 17:30:15 +01:00
parent d0d992cc55
commit 2d6da418a0
3 changed files with 111 additions and 151 deletions

34
bench.c
View File

@ -56,10 +56,10 @@ You can contact the author at :
#define HASH0 XXH32
// Making a wrapper to fit into the 32 bit api
unsigned int XXH64_32(const void* key, unsigned int len, unsigned int seed)
unsigned XXH64_32(const void* key, size_t len, unsigned seed)
{
unsigned long long hash = XXH64(key, len, seed);
return (unsigned int)(hash & 0xFFFFFFFF);
return (unsigned)(hash & 0xFFFFFFFF);
}
#define HASH1 XXH64_32
@ -108,7 +108,7 @@ unsigned int XXH64_32(const void* key, unsigned int len, unsigned int seed)
//**************************************
struct hashFunctionPrototype
{
unsigned int (*hashFunction)(const void*, unsigned int, unsigned int);
unsigned int (*hashFunction)(const void*, size_t, unsigned);
};
@ -420,20 +420,20 @@ static void BMK_checkResult64(U64 r1, U64 r2)
static void BMK_testSequence64(void* sentence, int len, U64 seed, U64 Nresult)
{
U64 Dresult;
void* state;
XXH64_state_t state;
int index;
Dresult = XXH64(sentence, len, seed);
BMK_checkResult64(Dresult, Nresult);
state = XXH64_init(seed);
XXH64_update(state, sentence, len);
Dresult = XXH64_digest(state);
XXH64_reset(&state, seed);
XXH64_update(&state, sentence, len);
Dresult = XXH64_digest(&state);
BMK_checkResult64(Dresult, Nresult);
state = XXH64_init(seed);
for (index=0; index<len; index++) XXH64_update(state, ((char*)sentence)+index, 1);
Dresult = XXH64_digest(state);
XXH64_reset(&state, seed);
for (index=0; index<len; index++) XXH64_update(&state, ((char*)sentence)+index, 1);
Dresult = XXH64_digest(&state);
BMK_checkResult64(Dresult, Nresult);
}
@ -441,20 +441,20 @@ static void BMK_testSequence64(void* sentence, int len, U64 seed, U64 Nresult)
static void BMK_testSequence(void* sentence, int len, U32 seed, U32 Nresult)
{
U32 Dresult;
void* state;
XXH32_state_t state;
int index;
Dresult = XXH32(sentence, len, seed);
BMK_checkResult(Dresult, Nresult);
state = XXH32_init(seed);
XXH32_update(state, sentence, len);
Dresult = XXH32_digest(state);
XXH32_reset(&state, seed);
XXH32_update(&state, sentence, len);
Dresult = XXH32_digest(&state);
BMK_checkResult(Dresult, Nresult);
state = XXH32_init(seed);
for (index=0; index<len; index++) XXH32_update(state, ((char*)sentence)+index, 1);
Dresult = XXH32_digest(state);
XXH32_reset(&state, seed);
for (index=0; index<len; index++) XXH32_update(&state, ((char*)sentence)+index, 1);
Dresult = XXH32_digest(&state);
BMK_checkResult(Dresult, Nresult);
}

118
xxhash.c
View File

@ -291,13 +291,14 @@ FORCE_INLINE U32 XXH32_endian_align(const void* input, unsigned int len, U32 see
}
U32 XXH32(const void* input, unsigned int len, U32 seed)
U32 XXH32 (const void* input, size_t len, U32 seed)
{
#if 0
#if 1
// Simple version, good for code maintenance, but unfortunately slow for small inputs
void* state = XXH32_init(seed);
XXH32_update(state, input, len);
return XXH32_digest(state);
XXH32_state_t state;
XXH32_reset(&state, seed);
XXH32_update(&state, input, len);
return XXH32_digest(&state);
#else
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
@ -398,7 +399,7 @@ FORCE_INLINE U64 XXH64_endian_align(const void* input, unsigned int len, U64 see
}
unsigned long long XXH64(const void* input, unsigned int len, unsigned long long seed)
unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
@ -418,11 +419,12 @@ unsigned long long XXH64(const void* input, unsigned int len, unsigned long long
return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
}
//****************************
// Advanced Hash Functions
//****************************
/****************************************************
* Advanced Hash Functions
****************************************************/
struct XXH_state32_t
/*** Allocation ***/
typedef struct
{
U64 total_len;
U32 seed;
@ -432,9 +434,9 @@ struct XXH_state32_t
U32 v4;
int memsize;
char memory[16];
};
} XXH_istate32_t;
struct XXH_state64_t
typedef struct
{
U64 total_len;
U64 seed;
@ -444,25 +446,29 @@ struct XXH_state64_t
U64 v4;
int memsize;
char memory[32];
};
} XXH_istate64_t;
int XXH32_sizeofState(void)
XXH32_state_t* XXH32_createState(void)
{
XXH_STATIC_ASSERT(XXH32_SIZEOFSTATE >= sizeof(struct XXH_state32_t)); // A compilation error here means XXH32_SIZEOFSTATE is not large enough
return sizeof(struct XXH_state32_t);
XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough
return (XXH32_state_t*)malloc(sizeof(XXH32_state_t));
}
XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr) { free(statePtr); return XXH_OK; };
int XXH64_sizeofState(void)
XXH64_state_t* XXH64_createState(void)
{
XXH_STATIC_ASSERT(XXH64_SIZEOFSTATE >= sizeof(struct XXH_state64_t)); // A compilation error here means XXH64_SIZEOFSTATE is not large enough
return sizeof(struct XXH_state64_t);
XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); // A compilation error here means XXH64_state_t is not large enough
return (XXH64_state_t*)malloc(sizeof(XXH64_state_t));
}
XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr) { free(statePtr); return XXH_OK; };
XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
/*** Hash feed ***/
XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
{
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
XXH_istate32_t* state = (XXH_istate32_t*) state_in;
state->seed = seed;
state->v1 = seed + PRIME32_1 + PRIME32_2;
state->v2 = seed + PRIME32_2;
@ -473,9 +479,9 @@ XXH_errorcode XXH32_resetState(void* state_in, U32 seed)
return XXH_OK;
}
XXH_errorcode XXH64_resetState(void* state_in, unsigned long long seed)
XXH_errorcode XXH64_reset(XXH64_state_t* state_in, unsigned long long seed)
{
struct XXH_state64_t * state = (struct XXH_state64_t *) state_in;
XXH_istate64_t* state = (XXH_istate64_t*) state_in;
state->seed = seed;
state->v1 = seed + PRIME64_1 + PRIME64_2;
state->v2 = seed + PRIME64_2;
@ -487,24 +493,9 @@ XXH_errorcode XXH64_resetState(void* state_in, unsigned long long seed)
}
void* XXH32_init (U32 seed)
FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
{
void* state = XXH_malloc (sizeof(struct XXH_state32_t));
if (state != NULL) XXH32_resetState(state, seed);
return state;
}
void* XXH64_init (unsigned long long seed)
{
void* state = XXH_malloc (sizeof(struct XXH_state64_t));
if (state != NULL) XXH64_resetState(state, seed);
return state;
}
FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
{
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
XXH_istate32_t* state = (XXH_istate32_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
@ -566,7 +557,7 @@ FORCE_INLINE XXH_errorcode XXH32_update_endian (void* state_in, const void* inpu
return XXH_OK;
}
XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len)
XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
@ -578,9 +569,9 @@ XXH_errorcode XXH32_update (void* state_in, const void* input, unsigned int len)
FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess endian)
FORCE_INLINE U32 XXH32_digest_endian (XXH32_state_t* state_in, XXH_endianess endian)
{
struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
XXH_istate32_t* state = (XXH_istate32_t*) state_in;
const BYTE * p = (const BYTE*)state->memory;
BYTE* bEnd = (BYTE*)state->memory + state->memsize;
U32 h32;
@ -620,30 +611,20 @@ FORCE_INLINE U32 XXH32_intermediateDigest_endian (void* state_in, XXH_endianess
}
U32 XXH32_intermediateDigest (void* state_in)
U32 XXH32_digest (XXH32_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH32_intermediateDigest_endian(state_in, XXH_littleEndian);
return XXH32_digest_endian(state_in, XXH_littleEndian);
else
return XXH32_intermediateDigest_endian(state_in, XXH_bigEndian);
return XXH32_digest_endian(state_in, XXH_bigEndian);
}
U32 XXH32_digest (void* state_in)
FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
{
U32 h32 = XXH32_intermediateDigest(state_in);
XXH_free(state_in);
return h32;
}
FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* input, int len, XXH_endianess endian)
{
struct XXH_state64_t * state = (struct XXH_state64_t *) state_in;
XXH_istate64_t * state = (XXH_istate64_t *) state_in;
const BYTE* p = (const BYTE*)input;
const BYTE* const bEnd = p + len;
@ -705,7 +686,7 @@ FORCE_INLINE XXH_errorcode XXH64_update_endian (void* state_in, const void* inpu
return XXH_OK;
}
XXH_errorcode XXH64_update (void* state_in, const void* input, unsigned int len)
XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
@ -717,9 +698,9 @@ XXH_errorcode XXH64_update (void* state_in, const void* input, unsigned int len)
FORCE_INLINE U64 XXH64_intermediateDigest_endian (void* state_in, XXH_endianess endian)
FORCE_INLINE U64 XXH64_digest_endian (XXH64_state_t* state_in, XXH_endianess endian)
{
struct XXH_state64_t * state = (struct XXH_state64_t *) state_in;
XXH_istate64_t * state = (XXH_istate64_t *) state_in;
const BYTE * p = (const BYTE*)state->memory;
BYTE* bEnd = (BYTE*)state->memory + state->memsize;
U64 h64;
@ -784,23 +765,14 @@ FORCE_INLINE U64 XXH64_intermediateDigest_endian (void* state_in, XXH_endianess
}
unsigned long long XXH64_intermediateDigest (void* state_in)
unsigned long long XXH64_digest (XXH64_state_t* state_in)
{
XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
return XXH64_intermediateDigest_endian(state_in, XXH_littleEndian);
return XXH64_digest_endian(state_in, XXH_littleEndian);
else
return XXH64_intermediateDigest_endian(state_in, XXH_bigEndian);
return XXH64_digest_endian(state_in, XXH_bigEndian);
}
unsigned long long XXH64_digest (void* state_in)
{
U64 h64 = XXH64_intermediateDigest(state_in);
XXH_free(state_in);
return h64;
}

110
xxhash.h
View File

@ -64,6 +64,12 @@ extern "C" {
#endif
/*****************************
Includes
*****************************/
#include <stddef.h> /* size_t */
/*****************************
Type
*****************************/
@ -75,18 +81,16 @@ typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
Simple Hash Functions
*****************************/
unsigned int XXH32 (const void* input, unsigned int len, unsigned int seed);
unsigned long long XXH64 (const void* input, unsigned int len, unsigned long long seed);
unsigned int XXH32 (const void* input, size_t length, unsigned seed);
unsigned long long XXH64 (const void* input, size_t length, unsigned long long seed);
/*
XXH32() :
Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
The memory between input & input+len must be valid (allocated and read-accessible).
Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
The memory between input & input+length must be valid (allocated and read-accessible).
"seed" can be used to alter the result predictably.
This function successfully passes all SMHasher tests.
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
Note that "len" is type "int", which means it is limited to 2^31-1.
If your data is larger, use the advanced functions below.
XXH64() :
Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
*/
@ -96,70 +100,54 @@ XXH64() :
/*****************************
Advanced Hash Functions
*****************************/
void* XXH32_init (unsigned int seed);
XXH_errorcode XXH32_update (void* state, const void* input, unsigned int len);
unsigned int XXH32_digest (void* state);
void* XXH64_init (unsigned long long seed);
XXH_errorcode XXH64_update (void* state, const void* input, unsigned int len);
unsigned long long XXH64_digest (void* state);
typedef struct { long long ll[ 6]; } XXH32_state_t;
typedef struct { long long ll[11]; } XXH64_state_t;
/*
These functions calculate the xxhash of an input provided in several small packets,
These structures allow static allocation of XXH states.
States must then be initialized using XXHnn_reset() before first use.
If you prefer dynamic allocation, please refer to functions below.
*/
XXH32_state_t* XXH32_createState(void);
XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
XXH64_state_t* XXH64_createState(void);
XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
/*
These functions create and release memory for XXH state.
States must then be initialized using XXHnn_reset() before first use.
*/
XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed);
XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
unsigned int XXH32_digest (XXH32_state_t* statePtr);
XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
unsigned long long XXH64_digest (XXH64_state_t* statePtr);
/*
These functions calculate the xxHash of an input provided in multiple smaller packets,
as opposed to an input provided as a single block.
It must be started with :
void* XXHnn_init()
The function returns a pointer which holds the state of calculation.
If the pointer is NULL, allocation has failed, so no state can be tracked.
XXH state space must first be allocated, using either static or dynamic method provided above.
The state pointer must be provided as "void* state" parameter for XXHnn_update().
XXHnn_update() can be called as many times as necessary.
The user must provide a valid (allocated) input.
Start a new hash by initializing state with a seed, using XXHnn_reset().
Then, feed the hash state by calling XXHnn_update() as many times as necessary.
Obviously, input must be valid, meaning allocated and read accessible.
The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
Note that "len" is type "int", which means it is limited to 2^31-1.
If your data is larger, it is recommended to chunk your data into blocks
of size for example 2^30 (1GB) to avoid any "int" overflow issue.
Finally, you can end the calculation anytime, by using XXHnn_digest().
Finally, you can produce a hash anytime, by using XXHnn_digest().
This function returns the final nn-bits hash.
You must provide the same "void* state" parameter created by XXHnn_init().
Memory will be freed by XXHnn_digest().
*/
You can nonetheless continue feeding the hash state with more input,
and therefore get some new hashes, by calling again XXHnn_digest().
int XXH32_sizeofState(void);
XXH_errorcode XXH32_resetState(void* state, unsigned int seed);
#define XXH32_SIZEOFSTATE 48
typedef struct { long long ll[(XXH32_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH32_stateSpace_t;
int XXH64_sizeofState(void);
XXH_errorcode XXH64_resetState(void* state, unsigned long long seed);
#define XXH64_SIZEOFSTATE 88
typedef struct { long long ll[(XXH64_SIZEOFSTATE+(sizeof(long long)-1))/sizeof(long long)]; } XXH64_stateSpace_t;
/*
These functions allow user application to make its own allocation for state.
XXHnn_sizeofState() is used to know how much space must be allocated for the xxHash nn-bits state.
Note that the state must be aligned to access 'long long' fields. Memory must be allocated and referenced by a pointer.
This pointer must then be provided as 'state' into XXHnn_resetState(), which initializes the state.
For static allocation purposes (such as allocation on stack, or freestanding systems without malloc()),
use the structure XXHnn_stateSpace_t, which will ensure that memory space is large enough and correctly aligned to access 'long long' fields.
*/
unsigned int XXH32_intermediateDigest (void* state);
unsigned long long XXH64_intermediateDigest (void* state);
/*
These functions do the same as XXHnn_digest(), generating a nn-bit hash,
but preserve memory context.
This way, it becomes possible to generate intermediate hashes, and then continue feeding data with XXHnn_update().
To free memory context, use XXHnn_digest(), or free().
When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
*/