Bloom filter

Contents

Bloom filter

Reference

import random
import hashlib
from collections.abc import Callable
from typing import List

import numpy as np
def make_hash(hash_func: Callable[[bytes], bytes], word: str, size: int) -> int:
    m = hash_func(word.encode())
    word_bytes = m.hexdigest().encode()
    sum_ax = 1
    for i, v in enumerate(word_bytes[0: -1: 2]):
        sum_ax += v * (127 ** i)

    return sum_ax % size

def search(hash_funcs: List[Callable[[bytes], bytes]], target: str, size: int) -> bool:
    for func in hash_funcs:
        index = make_hash(func, target, size)
        if not dicts[index]:
            return False

    return True
alphabets = 'abcdefghijklmnopqrstuvwxyz'
num_elements = 10

elements = random.sample(alphabets, num_elements)
print(elements)
['t', 'n', 'q', 'e', 'b', 'v', 'k', 'z', 'h', 'i']
hash_funcs = [hashlib.md5, hashlib.sha1, hashlib.sha224, hashlib.sha512]

size = 2**15

dicts = np.zeros(size, dtype=bool)
for ele in elements:
    for func in hash_funcs:
        index = make_hash(func, ele, size)
        dicts[index] = True
search(hash_funcs, 'c', size)
False

Bloom filter

Reference

import random
import hashlib
from collections.abc import Callable
from typing import List

import numpy as np
def make_hash(hash_func: Callable[[bytes], bytes], word: str, size: int) -> int:
    m = hash_func(word.encode())
    word_bytes = m.hexdigest().encode()
    sum_ax = 1
    for i, v in enumerate(word_bytes[0: -1: 2]):
        sum_ax += v * (127 ** i)

    return sum_ax % size

def search(hash_funcs: List[Callable[[bytes], bytes]], target: str, size: int) -> bool:
    for func in hash_funcs:
        index = make_hash(func, target, size)
        if not dicts[index]:
            return False

    return True
alphabets = 'abcdefghijklmnopqrstuvwxyz'
num_elements = 10

elements = random.sample(alphabets, num_elements)
print(elements)
['t', 'n', 'q', 'e', 'b', 'v', 'k', 'z', 'h', 'i']
hash_funcs = [hashlib.md5, hashlib.sha1, hashlib.sha224, hashlib.sha512]

size = 2**15

dicts = np.zeros(size, dtype=bool)
for ele in elements:
    for func in hash_funcs:
        index = make_hash(func, ele, size)
        dicts[index] = True
search(hash_funcs, 'c', size)
False