Contents
Bloom filter¶
Reference¶
import random
import hashlib
from collections.abc import Callable
from typing import List
import numpy as np
def make_hash(hash_func: Callable[[bytes], bytes], word: str, size: int) -> int:
m = hash_func(word.encode())
word_bytes = m.hexdigest().encode()
sum_ax = 1
for i, v in enumerate(word_bytes[0: -1: 2]):
sum_ax += v * (127 ** i)
return sum_ax % size
def search(hash_funcs: List[Callable[[bytes], bytes]], target: str, size: int) -> bool:
for func in hash_funcs:
index = make_hash(func, target, size)
if not dicts[index]:
return False
return True
alphabets = 'abcdefghijklmnopqrstuvwxyz'
num_elements = 10
elements = random.sample(alphabets, num_elements)
print(elements)
['t', 'n', 'q', 'e', 'b', 'v', 'k', 'z', 'h', 'i']
hash_funcs = [hashlib.md5, hashlib.sha1, hashlib.sha224, hashlib.sha512]
size = 2**15
dicts = np.zeros(size, dtype=bool)
for ele in elements:
for func in hash_funcs:
index = make_hash(func, ele, size)
dicts[index] = True
search(hash_funcs, 'c', size)
False