mirror of
https://github.com/RPCSX/xed.git
synced 2026-01-31 01:05:17 +01:00
154 lines
5.5 KiB
Python
154 lines
5.5 KiB
Python
#!/usr/bin/env python
|
|
# -*- python -*-
|
|
#BEGIN_LEGAL
|
|
#
|
|
#Copyright (c) 2016 Intel Corporation
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
#END_LEGAL
|
|
|
|
import sys
|
|
import os
|
|
|
|
import tup2int
|
|
import xedhash
|
|
#paper of Fredman Komlos and Szemeredi (1984) :
|
|
#following hash function family is universal:
|
|
# x -> (kx mod p) mod s
|
|
# s ~ n^2
|
|
# p > s
|
|
# k is in 1,3,..., 2k+1
|
|
#Implementation of the hash_fun_interface_t
|
|
class hash_fun_fks_t(xedhash.hash_fun_interface_t):
|
|
def __init__(self, k, p, m):
|
|
self.k = k
|
|
self.p = p
|
|
if p < m:
|
|
self.m = p
|
|
else:
|
|
self.m = m
|
|
def kind(self):
|
|
return "fks"
|
|
|
|
def get_table_size(self):
|
|
return self.m
|
|
|
|
def tuple2int(self, tuple_val, cnames, op_widths_dict):#FIXME NOT USED
|
|
return tup2int.tuple2int(tuple_val, cnames, op_widths_dict)
|
|
|
|
def apply(self, x):
|
|
return ((self.k*x) % self.p) % self.m
|
|
|
|
def emit_cexpr(self, key_str='key'):
|
|
if self.m == 1:
|
|
return '(0)'
|
|
if self.p == self.m:
|
|
#when m==p it is unnecessary to do "mod m"
|
|
expr = '(%d*%s %% %d)' % (self.k, key_str, self.p)
|
|
else:
|
|
expr = '((%d*%s %% %d) %% %d)' % (self.k, key_str, self.p, self.m)
|
|
return expr
|
|
|
|
def need_hash_index_validation(self):
|
|
return True
|
|
|
|
def add_key_validation(self, strings_dict):
|
|
key_str = strings_dict['key_str']
|
|
hentry_str ='%s[%s]' % (strings_dict['table_name'],
|
|
strings_dict['hidx_str'])
|
|
|
|
return 'if(%s.key == %s)' % (hentry_str, key_str)
|
|
def __str__(self):
|
|
lines = []
|
|
lines.append('x = Sigma(Ti << bit_shift)')
|
|
lines.append('FKS(x) = (%dx mod %d) mod %d' % (self.k, self.p, self.m))
|
|
return '\n'.join(lines)
|
|
|
|
|
|
_primes = [
|
|
2, 3, 5, 7, 11, 13, 17, 19, 23, 29,
|
|
31, 37, 41, 43, 47, 53, 59, 61, 67, 71,
|
|
73, 79, 83, 89, 97, 101, 103, 107, 109, 113,
|
|
127, 131, 137, 139, 149, 151, 157, 163, 167, 173,
|
|
179, 181, 191, 193, 197, 199, 211, 223, 227, 229,
|
|
233, 239, 241, 251, 257, 263, 269, 271, 277, 281,
|
|
283, 293, 307, 311, 313, 317, 331, 337, 347, 349,
|
|
353, 359, 367, 373, 379, 383, 389, 397, 401, 409,
|
|
419, 421, 431, 433, 439, 443, 449, 457, 461, 463,
|
|
467, 479, 487, 491, 499, 503, 509, 521, 523, 541,
|
|
547, 557, 563, 569, 571, 577, 587, 593, 599, 601,
|
|
607, 613, 617, 619, 631, 641, 643, 647, 653, 659,
|
|
661, 673, 677, 683, 691, 701, 709, 719, 727, 733,
|
|
739, 743, 751, 757, 761, 769, 773, 787, 797, 809,
|
|
811, 821, 823, 827, 829, 839, 853, 857, 859, 863,
|
|
877, 881, 883, 887, 907, 911, 919, 929, 937, 941]
|
|
|
|
#FIXME: BTW the tuple->int computation itself is a good hash function.
|
|
#Moreover, when m is a prime, the functions h(t) = (Sigma(AiTi)) mod m
|
|
#where Ti and Ai are arbitrary values in Zm Galois field
|
|
#are a universal hash function family!
|
|
#Hence our tuple2int function might be a good hash function itself, and
|
|
#if we parametrize it with Ai, we can get perfect hash functions!
|
|
#This can be faster than FKS because we do the tuple2int computation
|
|
#for FKS anyway before applying the FKS hash function.
|
|
#TODO: check if this approach is better than FKS.
|
|
_max_k = 32
|
|
|
|
_l1_bucket_max = 8 # FIXME: make this a parameter. Also in ild_phash.py
|
|
|
|
|
|
def _get_l1_mlist(n):
|
|
"""n is the number of keys in the hash function. Guessing a good size
|
|
for the hash table. Try squaring number of keys but that is too
|
|
much. if we use n, we get minimal perfect hash functions """
|
|
global _l1_bucket_max
|
|
if n == 1:
|
|
mlist = [1]
|
|
elif n <= _l1_bucket_max:
|
|
mlist = [n, 2*n, n*n]
|
|
else:
|
|
mlist = [n, 2*n] #just to try
|
|
return mlist
|
|
|
|
def find_fks_perfect(keylist):
|
|
"""Return a perfect hash function for a given key list. Or None if no
|
|
perfect hash function could be found."""
|
|
global _max_k
|
|
mlist = _get_l1_mlist(len(keylist))
|
|
for m in mlist: # of buckets
|
|
for p in _primes:
|
|
for k in range(3, _max_k):
|
|
hash_f = hash_fun_fks_t(k,p,m)
|
|
if xedhash.is_perfect(keylist, hash_f):
|
|
return hash_f
|
|
del hash_f
|
|
return None
|
|
|
|
|
|
|
|
def find_fks_well_distributed(keylist):
|
|
"""Return a hash well-distributed function (not necessarily perfect!)
|
|
for a given key list"""
|
|
global _max_k
|
|
global _l1_bucket_max
|
|
mlist = _get_l1_mlist(len(keylist)) # number of buckets
|
|
for m in mlist:
|
|
for p in _primes:
|
|
for k in range(3, _max_k):
|
|
hash_f = hash_fun_fks_t(k,p,m)
|
|
if xedhash.is_well_distributed(keylist, hash_f, _l1_bucket_max):
|
|
return hash_f
|
|
del hash_f
|
|
return None
|