mirror of
https://github.com/FEX-Emu/xxHash.git
synced 2024-11-23 14:39:40 +00:00
simplify integration of other hash algorithms
This commit is contained in:
parent
dadcbc4a0f
commit
e618ec4dcc
@ -34,13 +34,17 @@ LDFLAGS += -pthread
|
||||
LDFLAGS += -maes -mavx2
|
||||
TESTHASHES = 110000000
|
||||
|
||||
HASH_SRC := $(sort $(wildcard allcodecs/*.c allcodecs/*.cc))
|
||||
HASH_OBJ := $(patsubst %.c,%.o,$(HASH_SRC))
|
||||
|
||||
|
||||
.PHONY: default
|
||||
default: release
|
||||
|
||||
.PHONY: all
|
||||
all: release
|
||||
|
||||
collisionsTest: main.o pool.o threading.o sort.o
|
||||
collisionsTest: main.o pool.o threading.o sort.o $(HASH_OBJ)
|
||||
$(CXX) $(CPPFLAGS) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
main.o: hashes.h xxh3.h xxhash.h
|
||||
@ -68,5 +72,5 @@ test: debug
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
$(RM) *.o
|
||||
$(RM) *.o allcodecs/*.o
|
||||
$(RM) collisionsTest
|
||||
|
@ -6,18 +6,22 @@ and comparing the result to an "ideal" target.
|
||||
|
||||
The test requires a very large amount of memory.
|
||||
By default, it will generate 24 billion of 64-bit hashes,
|
||||
requiring 192 GB of RAM for their storage.
|
||||
The number of hashes can be modified using command `--nbh=`,
|
||||
but beware that requiring too few hashes will not provide meaningful information on the algorithm's collision performance.
|
||||
requiring __192 GB of RAM__ for their storage.
|
||||
The number of hashes can be modified using command `--nbh=`.
|
||||
be aware that testing the collision ratio of 64-bit hashes
|
||||
requires a very large amount of hashes (several billions) for meaningful measurements.
|
||||
|
||||
To reduce RAM usage, an optional filter can be requested, with `--filter`.
|
||||
It reduces the nb of candidates to analyze, hence associated RAM budget.
|
||||
Be aware that the filter also requires RAM
|
||||
Note that the filter itself requires a lot of RAM
|
||||
(32 GB by default, can be modified using `--filterlog=`,
|
||||
a too small filter will not be efficient, aim at ~2 bytes per hash),
|
||||
and that managing the filter costs a significant CPU budget.
|
||||
and reading and writing into filter cost a significant CPU budget,
|
||||
so this method is slower.
|
||||
It also doesn't allow advanced analysis of partial bitfields,
|
||||
since most hashes will be discarded and not stored.
|
||||
|
||||
The RAM budget will be completed by a list of candidates,
|
||||
When using the filter, the RAM budget consists of the filter and a list of candidates,
|
||||
which will be a fraction of original hash list.
|
||||
Using default settings (24 billions hashes, 32 GB filter),
|
||||
the number of potential candidates should be reduced to less than 2 billions,
|
||||
@ -28,6 +32,45 @@ but storage must allocate an upper bound.
|
||||
|
||||
For the default test, the expected "optimal" collision rate for a 64-bit hash function is ~18 collisions.
|
||||
|
||||
#### How to integrate any hash in the tester
|
||||
|
||||
The build script is expecting to compile files in `./allcodecs`.
|
||||
Put the source code here.
|
||||
This also works if the hash is a single `*.h` files.
|
||||
|
||||
The flue happens in `hashes.h`.
|
||||
In this file, there are 2 sections :
|
||||
- Add the required `#include "header.h"`, and create a wrapper,
|
||||
to respect the format expected by the function pointer.
|
||||
- Add the wrapper, along with the name and an indication of the output width,
|
||||
to the table, at the end of `hashed.h`
|
||||
|
||||
Build with `make`. Locate your new hash with `./collisionsTest -h`,
|
||||
it should be listed.
|
||||
|
||||
|
||||
#### Some advises on how to setup a collisions test
|
||||
|
||||
The test is primarily driven by the amount of RAM available.
|
||||
Here's a method to decide the size of the test.
|
||||
|
||||
Presuming that RAM budget is not plentiful, for this example 32 GB,
|
||||
the `--filter` mode is actually compulsory to measure anything meaningful.
|
||||
Let's plan 50% of memory for the filter, that's 16 GB.
|
||||
This will be good enough to filter about 10% less hashes than this size.
|
||||
Let's round down to 14 G.
|
||||
|
||||
By requesting 14G, the expectation is that the program will automatically
|
||||
size the filter to 16 GB, and expect to store ~1G candidates,
|
||||
leaving enough room to breeze for the system.
|
||||
|
||||
The command line becomes :
|
||||
```
|
||||
./collisionsTest --nbh=14G --filter NameOfHash
|
||||
```
|
||||
|
||||
#### Examples :
|
||||
|
||||
Here are a few results produced with this tester :
|
||||
|
||||
| Name | nb Collisions | Notes |
|
||||
|
38
tests/collisions/allcodecs/dummy.c
Normal file
38
tests/collisions/allcodecs/dummy.c
Normal file
@ -0,0 +1,38 @@
|
||||
/* dummy.c,
|
||||
* a fake hash algorithm, just to test integration capabilities.
|
||||
* Part of xxHash project
|
||||
* Copyright (C) 2012-present, Yann Collet
|
||||
*
|
||||
* GPL v2 License
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - xxHash homepage : http://www.xxhash.com
|
||||
* - xxHash source repository : https://github.com/Cyan4973/xxHash
|
||||
*/
|
||||
|
||||
|
||||
#include <dummy.h>
|
||||
|
||||
unsigned badsum32(const void* input, size_t len, unsigned seed)
|
||||
{
|
||||
unsigned sum = seed;
|
||||
const unsigned char* in8 = input;
|
||||
size_t c;
|
||||
for (c=0; c<len; c++)
|
||||
sum += in8[c];
|
||||
return sum;
|
||||
}
|
44
tests/collisions/allcodecs/dummy.h
Normal file
44
tests/collisions/allcodecs/dummy.h
Normal file
@ -0,0 +1,44 @@
|
||||
/* dummy.c,
|
||||
* a fake hash algorithm, just to test integration capabilities.
|
||||
* Part of xxHash project
|
||||
* Copyright (C) 2012-present, Yann Collet
|
||||
*
|
||||
* GPL v2 License
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along
|
||||
* with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* You can contact the author at :
|
||||
* - xxHash homepage : http://www.xxhash.com
|
||||
* - xxHash source repository : https://github.com/Cyan4973/xxHash
|
||||
*/
|
||||
|
||||
#ifndef DUMMY_H_987987
|
||||
#define DUMMY_H_987987
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#include <stddef.h> /* size_t */
|
||||
|
||||
unsigned badsum32(const void* input, size_t len, unsigned seed);
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* DUMMY_H_987987 */
|
@ -39,6 +39,12 @@ typedef union {
|
||||
XXH128_hash_t h128;
|
||||
} UniHash;
|
||||
|
||||
UniHash uniHash32(uint64_t v32)
|
||||
{ UniHash unih;
|
||||
unih.h64 = v32;
|
||||
return unih;
|
||||
}
|
||||
|
||||
UniHash uniHash64(uint64_t v64)
|
||||
{ UniHash unih;
|
||||
unih.h64 = v64;
|
||||
@ -81,7 +87,16 @@ UniHash XXH64_wrapper (const void* data, size_t size)
|
||||
|
||||
UniHash XXH32_wrapper (const void* data, size_t size)
|
||||
{
|
||||
return uniHash64( XXH32(data, size, 0) );
|
||||
return uniHash32( XXH32(data, size, 0) );
|
||||
}
|
||||
|
||||
/* === Dummy integration example === */
|
||||
|
||||
#include "dummy.h"
|
||||
|
||||
UniHash badsum32_wrapper (const void* data, size_t size)
|
||||
{
|
||||
return uniHash32( badsum32(data, size, 0) );
|
||||
}
|
||||
|
||||
|
||||
@ -96,7 +111,7 @@ typedef struct {
|
||||
int bits;
|
||||
} hashDescription;
|
||||
|
||||
#define HASH_FN_TOTAL 6
|
||||
#define HASH_FN_TOTAL 7
|
||||
|
||||
hashDescription hashfnTable[HASH_FN_TOTAL] = {
|
||||
{ "xxh3" , XXH3_wrapper, 64 },
|
||||
@ -105,6 +120,7 @@ hashDescription hashfnTable[HASH_FN_TOTAL] = {
|
||||
{ "xxh128l", XXH128l_wrapper, 64 },
|
||||
{ "xxh128h", XXH128h_wrapper, 64 },
|
||||
{ "xxh32" , XXH32_wrapper, 32 },
|
||||
{ "badsum32",badsum32_wrapper, 32 },
|
||||
};
|
||||
|
||||
#endif /* HASHES_H_1235465 */
|
||||
|
Loading…
Reference in New Issue
Block a user