xed/pysrc/hashmul.py

211 lines
5.9 KiB
Python
Raw Normal View History

#!/usr/bin/env python
# -*- python -*-
#BEGIN_LEGAL
#
#Copyright (c) 2017 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#END_LEGAL
from __future__ import print_function
import sys
import os
import math
import xedhash
class hashmul_t(xedhash.hash_fun_interface_t):
"""Implement multiplicative hashing."""
def __init__(self, table_size):
# golden ratio phi is (1+sqrt(5))/2. From Knuth, volume 3, page 516
# 1/phi = (sqrt(5)-1)/2 (after some arithmetic)
# We are using 1/phi * 2**n
# where n is the number of bits in the data type (32)
self.golden_ratio_recip2to32 = 2654435769
self.table_size = table_size
# pow2 is True if the table is a power of 2.
# ilog2_table_size is only valid if pow2 is True
self.pow2, self.ilog2_table_size = self.power_of_2()
def kind(self):
return "mult"
def power_of_2(self):
ilog2_table_size = int(math.log(self.table_size,2))
if pow(2,ilog2_table_size) == self.table_size:
return (True, ilog2_table_size)
return (False, -1)
def get_table_size(self):
return self.table_size
def __str__(self):
return "h(x) = hashmul({})".format(self.table_size)
def apply(self, k):
"""Apply the hash function to the key k"""
#sys.stderr.write("Apply {} --> ".format(k))
q = self.golden_ratio_recip2to32 * k
fraction = q & ((1<<32)-1)
r = fraction * self.table_size
v = r >> 32
#sys.stderr.write(" {}\n".format(v))
return v
def apply_pow2(self, k):
"""Apply the hash function to the key k, for power of 2 table sizes"""
q = self.golden_ratio_recip2to32 * k
fraction = q & ((1<<32)-1)
v = fraction >> (32-self.ilog2_table_size)
return v
def is_perfect(self, key_list):
values = set()
for k in key_list:
#sys.stderr.write("Checking {}\n".format(k))
v = self.apply(k)
if v in values:
# collision - not perfect
return False
values.add(v)
# no collisions in the output of the hash: perfect
return True
def need_hash_index_validation(self):
"""Need to validate that we landed on live bucket"""
return True
def add_key_validation(self, strings_dict):
key_str = strings_dict['key_str']
hentry_str ='%s[%s]' % (strings_dict['table_name'],
strings_dict['hidx_str'])
return 'if(%s.key == %s)' % (hentry_str, key_str)
def emit_cvar_decl(self):
if self.pow2:
return "xed_union64_t t"
else:
return "xed_union64_t t, u"
def emit_cexpr(self, key_str="key"):
"""Emit a C expression for the hash function given a C variable
key_str."""
if self.pow2:
# power of 2 table size can replace the 2nd multiply with a shift
c_hash_expr = """(t.u64 = {0} * {1}, t.s.lo32 >> (32-{2}))""".format(
str(self.golden_ratio_recip2to32),
key_str,
self.ilog2_table_size)
else:
# the ULL cast on the constant is important to get 64b math.
c_hash_expr = """(t.u64 = {0} * {1}, u.u64 = t.s.lo32 * {2}ULL, u.s.hi32)""".format(
str(self.golden_ratio_recip2to32),
key_str,
str(self.table_size))
return c_hash_expr
def find_perfect(keylist):
n = len(keylist)
for m in range(n,2*n):
f = hashmul_t(n)
if f.is_perfect(keylist):
return f
return None
def test1():
f = hashmul_t(128)
for k in range(0,128):
v = f.apply(k)
print("{} -> {}".format(k,v))
if f.is_perfect(range(0,128)):
print("Hash function is perfect")
else:
print("Hash function has collisions")
print(f.emit_cexpr())
return 0
def test2():
f = hashmul_t(9)
inputs = [225,2273,737,2785,241,2289,753,2801]
for k in inputs:
v = f.apply(k)
print("{} -> {}".format(k,v))
if f.is_perfect(inputs):
print("Hash function is perfect")
else:
print("Hash function has collisions")
print(f.emit_cexpr())
return 0
def test3():
f = hashmul_t(16)
inputs = [225,2273,737,2785,241,2289,753,2801]
for k in inputs:
v1 = f.apply(k)
v2 = f.apply_pow2(k)
if v1 != v2:
print("ERROR {} -> {} {}".format(k,v1,v2))
else:
print("OK {} -> {} {}".format(k,v1,v2))
if f.is_perfect(inputs):
print("Hash function is perfect")
else:
print("Hash function has collisions")
print(f.emit_cexpr())
return 0
def test4():
f = hashmul_t(1)
inputs = [68002]
for k in inputs:
v1 = f.apply(k)
v2 = f.apply_pow2(k)
if v1 != v2:
print("ERROR {} -> {} {}".format(k,v1,v2))
else:
print("OK {} -> {} {}".format(k,v1,v2))
if f.is_perfect(inputs):
print("Hash function is perfect")
else:
print("Hash function has collisions")
print(f.emit_cexpr())
return 0
def test():
for f in [test1, test2, test3, test4]:
r = f()
if r:
print("FAIL: {}".format(f.__name__))
else:
print("PASS: {}".format(f.__name__))
if __name__ == "__main__":
r = test()
sys.exit(r)