{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveAnyClass #-}

{-|
Module      : Data.CuckooFilter
Copyright   : (c) Chris Coffey, 2018
License     : MIT
Maintainer  : chris@foldl.io
Stability   : experimental

Cuckoo filters are an alternative data structure to Bloom filters. They use a different
approach to hashing items into the filter, which provides different behavior under load.

Inserting an item in to a Bloom filter always succeeds, although as the load factor on
the filter increases the false positive probability trends towards /%100/. Cuckoo filters
on the other hand hold the false positive probability constant under load, but will
begin to fail inserts.

Cuckoo filters also support deletion natively, which allows reclaiming some used space
from the filter to ward off insertion failures.

For more details see: Fan, . Cuckoo Filter: Practically Better Than Bloom. Retrieved August 22, 2016, from https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf
-}

module Data.CuckooFilter
    (
    -- * The Cuckoo Filter
    Size,
    makeSize,
    Filter,
    empty,

    -- * Working with a Cuckoo Filter
    insert,
    member,
    delete
    ) where

import Data.Hashable (Hashable)
import qualified Data.IntMap.Strict as IM
import Data.Maybe (fromMaybe)

import Data.CuckooFilter.Internal


-- | In exchange for the stable false-positive probability, insertion into a cuckoo filter
-- may fail as the load factor increases.
--
-- Amoritized /O(1)/
--
-- Note, because of how cuckoo hashing works, inserts will fail when there's a set of items /s/
-- that hash to the same fingerprint and share either IndexA or IndexB with probability /(2/numBuckets * 1/256)^ (|s|-1)/.
-- Alternatively, inserting the same item /2b+1/ times will trigger the failure as well.
--
insert :: (Hashable a) =>
    Filter a -- ^ Current filter state
    -> a -- ^ Item to hash and store in the filter
    -> Maybe (Filter a)
insert cfilt@(F {numBuckets}) val = let
    idxA = primaryIndex val numBuckets
    fp = makeFingerprint val
    bkts = buckets cfilt
    bucketA = fromMaybe emptyBucket $ toIndex numBuckets idxA `IM.lookup` bkts
    in case insertBucket fp bucketA of
        Just bucketA' -> Just $ cfilt {buckets = IM.insert (toIndex numBuckets idxA) bucketA' bkts}
        Nothing -> let
            idxB = secondaryIndex fp numBuckets idxA
            in bumpHash maxNumKicks cfilt idxB fp
    where
        (Size s) = size cfilt
        maxNumKicks = floor $ 0.1 * fromIntegral s

        -- The details of this algorithm can be found in https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf
        -- If the kick count is exhausted, the insert fails
        bumpHash 0 _ _ _ = Nothing
        bumpHash remaingKicks cfilt' idxB fp = let
            bkts = buckets cfilt'
            bucketB = fromMaybe emptyBucket $ toIndex numBuckets idxB `IM.lookup` bkts
            in case insertBucket fp bucketB of
                Just bb' -> Just $ cfilt' {buckets = IM.insert (toIndex numBuckets idxB) bb' bkts }
                Nothing -> let
                    (bumpedFP, bucketB') = replaceInBucket fp isBucketMinimum bucketB
                    nextStepFilter = cfilt' {buckets = IM.insert (toIndex numBuckets idxB) bucketB' bkts }
                    kickedIndex = kickedSecondaryIndex bumpedFP numBuckets idxB
                    in bumpHash (remaingKicks - 1) nextStepFilter kickedIndex bumpedFP

        isBucketMinimum _ bkt = let
            a = getCell bkt 0
            b = getCell bkt 1
            c = getCell bkt 2
            d = getCell bkt 3
            m = min a . min b $ min c d
            in (a == m, b == m, c == m, d == m)

-- | Checks whether a given item is within the filter.
--
-- /O(1)/
member :: (Hashable a) =>
    a -- ^ Check if this element is in the filter
    -> Filter a -- ^ The filter
    -> Bool
member a cFilter =
    inBucket fp bA || inBucket fp bB
    where
        bktCount = numBuckets cFilter
        fp = makeFingerprint a
        idxA = primaryIndex a bktCount
        idxB = secondaryIndex fp bktCount idxA
        bkts = buckets cFilter

        -- TODO Try to make this typesafe
        bA = fromMaybe emptyBucket $ toIndex bktCount idxA `IM.lookup` bkts
        bB = fromMaybe emptyBucket $ toIndex bktCount idxB `IM.lookup` bkts

        -- fp `elem` [a,b,c,d] is simpler, but it allocates an additional list unnecessarily
        inBucket fp bucket =
            fp == getCell bucket 0 ||
            fp == getCell bucket 1 ||
            fp == getCell bucket 2 ||
            fp == getCell bucket 3



-- | Deletes a single occurance of 'a' from the 'Filter'. It first checks for a value
-- in the primary index, then in the secondary index.
--
-- Deleting an element not in the Cuckoo Filter is a noop and returns the filter unchanged.
--
-- /O(1)/
delete :: (Hashable a) =>
    Filter a
    -> a
    -> Filter a
delete cFilt@(F {numBuckets, buckets}) a
    | not $ member a cFilt = cFilt
    | otherwise = let
        bucketA = fromMaybe emptyBucket $ toIndex numBuckets idxA `IM.lookup` buckets
        bucketB = fromMaybe emptyBucket $ toIndex numBuckets idxB `IM.lookup` buckets
        (removedFromA, bucketA') = removeFromBucket bucketA
        (_, bucketB') = removeFromBucket bucketB
        in if removedFromA
           then cFilt {buckets = IM.insert (toIndex numBuckets idxA) bucketA' buckets}
           else cFilt {buckets = IM.insert (toIndex numBuckets idxB) bucketB' buckets}
    where
        fp = makeFingerprint a
        idxA = primaryIndex a numBuckets
        idxB = secondaryIndex fp numBuckets idxA
        -- TODO just use Control.Arrow
        matchesFP _ bucket = (fp == getCell bucket 0,
                              fp == getCell bucket 1,
                              fp == getCell bucket 2,
                              fp == getCell bucket 3)
        removeFromBucket bucket = let
            (_, bucket') = replaceInBucket (FP 0) matchesFP bucket
            in (bucket /= bucket', bucket')