"""
Bloom Filter — Space-efficient probabilistic set membership.

Trade certainty for memory: a Bloom filter can tell you "definitely not
in the set" or "probably in the set." False positives are possible;
false negatives are not. At 1% FP rate, uses ~9.6 bits per element
regardless of element size.

Uses: spell checkers, network routers, database query optimizers,
web crawlers (skip visited URLs), CDN cache summaries.
"""

import math
import mmh3
from bitarray import bitarray


class BloomFilter:
    def __init__(self, expected_items: int, fp_rate: float = 0.01):
        self.size = self._optimal_size(expected_items, fp_rate)
        self.hash_count = self._optimal_hashes(self.size, expected_items)
        self.bits = bitarray(self.size)
        self.bits.setall(0)
        self.count = 0

    def add(self, item: str) -> None:
        for seed in range(self.hash_count):
            idx = mmh3.hash(item, seed) % self.size
            self.bits[idx] = 1
        self.count += 1

    def __contains__(self, item: str) -> bool:
        return all(
            self.bits[mmh3.hash(item, seed) % self.size]
            for seed in range(self.hash_count)
        )

    @property
    def estimated_fp_rate(self) -> float:
        """Current false positive probability given items inserted."""
        filled = self.bits.count(1) / self.size
        return filled ** self.hash_count

    @staticmethod
    def _optimal_size(n: int, p: float) -> int:
        return int(-n * math.log(p) / (math.log(2) ** 2))

    @staticmethod
    def _optimal_hashes(m: int, n: int) -> int:
        return max(1, int((m / n) * math.log(2)))


# Quick demo
if __name__ == "__main__":
    bf = BloomFilter(expected_items=100_000, fp_rate=0.001)

    words = ["serendipity", "ephemeral", "luminous", "petrichor", "vellichor"]
    for w in words:
        bf.add(w)

    print(f"Filter: {bf.size:,} bits ({bf.size // 8:,} bytes), {bf.hash_count} hashes")
    print(f"'serendipity' in filter: {'serendipity' in bf}")   # True
    print(f"'nostalgia' in filter:   {'nostalgia' in bf}")      # False
    print(f"Estimated FP rate: {bf.estimated_fp_rate:.6%}")
