{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DeriveAnyClass #-}

{-|
Module      : Data.CuckooFilter.Internal
Description : Internal functions and data types for Data.CuckooFilter
Copyright   : (c) Chris Coffey, 2018
License     : MIT
Maintainer  : chris@foldl.io
Stability   : experimental

This is the internal API and implemntation of 'Data.CuckooFilter'. It is subject to
change at any time and should not be used. Instead, use the exports from 'Data.CuckooFilter'.
-}

module Data.CuckooFilter.Internal (
    -- * Constructing a Cuckoo Filter
    Size(..),
    makeSize,
    CuckooFilter(..),

    -- * Fingerprints
    FingerPrint(..),
    emptyFP,
    makeFingerprint,

    -- * Working with indices
    Bucket(..),
    emptyBucket,
    Index(..),
    IndexA(..),
    IndexB(..),
    replaceInBucket,
    insertBucket,
    primaryIndex,
    secondaryIndex,
    kickedSecondaryIndex,

    -- ** Bucket Cells,
    getCell,
    setCell
) where

import Data.Aeson (ToJSON, FromJSON)
import Data.Bits (xor, (.&.), (.|.), shiftR, shiftL)
import Data.Foldable (foldl')
import qualified Data.IntMap.Strict as IM
import Data.Hashable (Hashable, hash)
import Data.Serialize (Serialize)
import Data.Word (Word32, Word8)
import GHC.Generics (Generic)
import Numeric.Natural (Natural)

-- | A low-level interface for working with cuckoo filter storage.
class Monad m => CuckooFilter filt m where
    -- | Create a new cuckoo filter of the specified size
    initialize :: Size -> m (filt a)

    -- | Return the number of buckets contained in the filter. This is distinct from the total size of the filter (size /4)
    bucketCount :: filt a -> m Natural

    -- | Write the new contents of a bucket to the storage
    writeBucket :: Int -> Bucket -> filt a -> m (filt a)

    -- | Read the contents of a bucket from the storage
    readBucket :: Int -> filt a -> m Bucket


-- | A non-zero natural number. Generally this is a power of two, although there's no hard requirement
-- for that given the current implementation.
newtype Size = Size Natural
    deriving (Show, Eq, Ord)
    deriving stock Generic
    deriving newtype (Serialize, ToJSON, FromJSON)
-- | Safely make a 'Size' or fail if a 0 is provided.
makeSize :: Natural -> Maybe Size
makeSize n
    | n == 0 = Nothing
    | otherwise = Just . Size $ fromIntegral n

class Index a where
    toIndex :: Natural -> a -> Int

-- | An Index represents the keys into buckets
newtype IndexA = IA Word32
    deriving (Show, Eq, Ord, Generic)
    deriving newtype (ToJSON, FromJSON, Hashable)
    deriving anyclass Serialize
instance Index IndexA where
    toIndex numBuckets (IA n) = fromIntegral n `mod` fromIntegral numBuckets

newtype IndexB = IB Word32
    deriving (Show, Eq, Ord, Generic)
    deriving newtype (ToJSON, FromJSON, Hashable)
    deriving anyclass Serialize
instance Index IndexB where
    toIndex numBuckets (IB n) = fromIntegral n `mod` fromIntegral numBuckets

-- | A FingerPrint is an 8 bit hash of a value
newtype FingerPrint = FP Word8
    deriving (Show, Eq, Ord, Generic)
    deriving newtype (ToJSON, FromJSON, Hashable)
    deriving anyclass Serialize
emptyFP :: FingerPrint
emptyFP = FP 0
-- | A Bucket is a statically sized list of four FingerPrints.
--
newtype Bucket = B Word32
    deriving (Show, Ord)
    deriving stock Generic
    deriving newtype (ToJSON, FromJSON, Eq)
    deriving anyclass Serialize
emptyBucket :: Bucket
emptyBucket = B 0

getCell ::
    Bucket
    -> Natural -- Really just 0-3. Is it worth creating a custom datatype for this?
    -> FingerPrint
{-# INLINE getCell #-}
getCell (B bucket) cellNumber =
    FP . fromIntegral $ (bucket .&. mask) `shiftR` offset
    where
        offset = (fromIntegral cellNumber) * 8
        mask = (255 :: Word32) `shiftL` offset

setCell ::
    Bucket
    -> Natural
    -> FingerPrint
    -> Bucket
{-# INLINE setCell #-}
setCell (B bucket) cellNumber (FP fp) =
    B $ zeroed .|. mask
    where
        offset = (fromIntegral cellNumber) * 8
        zeroed = (bucket .|. zeroMask) `xor` zeroMask
        zeroMask = (255 :: Word32) `shiftL` offset
        mask = (fromIntegral fp :: Word32) `shiftL` offset


--
-- Working with Buckets
--
insertBucket ::
    FingerPrint
    -> Bucket
    -> Maybe Bucket
insertBucket fp bucket =
    case (a,b,c,d) of
        (True, _, _, _) -> Just $ setCell bucket 0 fp
        (_, True, _, _) -> Just $ setCell bucket 1 fp
        (_, _, True, _) -> Just $ setCell bucket 2 fp
        (_, _, _, True) -> Just $ setCell bucket 3 fp
        _ -> Nothing
    where
        -- TODO factor out all of this duplicated code
        a = emptyFP == getCell bucket 0
        b = emptyFP == getCell bucket 1
        c = emptyFP == getCell bucket 2
        d = emptyFP == getCell bucket 3

replaceInBucket ::
    FingerPrint
    -> (FingerPrint -> Bucket -> (Bool, Bool, Bool, Bool)) -- ^ Bucket predicate
    -> Bucket -- existing bucket
    -> (FingerPrint, Bucket) -- Removed fingerprint and latest bucket state
replaceInBucket fp predicate bucket = let
    results = predicate fp bucket
    in case results of
        (True, _, _, _) -> (getCell bucket 0, setCell bucket 0 fp)
        (_, True, _, _) -> (getCell bucket 1, setCell bucket 1 fp)
        (_, _, True, _) -> (getCell bucket 2, setCell bucket 2 fp)
        (_, _, _, True) -> (getCell bucket 3, setCell bucket 3 fp)
        _ -> (fp, bucket)

--
-- Index and hashes
--

-- | hash a % 255. Fingerprints are 8 bits each, and completely opaque to the
-- lookup algorithm.
makeFingerprint :: Hashable a =>
    a
    -> FingerPrint
{-# INLINE makeFingerprint #-}
makeFingerprint a = FP . max 1 $  fromIntegral (abs $ hash a) `mod` 255

-- | (hash a) % numBuckets
primaryIndex :: Hashable a =>
    a
    -> Natural
    -> IndexA
{-# INLINE primaryIndex #-}
primaryIndex a numBuckets =
    IA . fromIntegral $ hash a

-- | (indexA `xor` hash fp) % numBuckets
secondaryIndex ::
    FingerPrint
    -> Natural
    -> IndexA
    -> IndexB
{-# INLINE secondaryIndex #-}
secondaryIndex fp numBuckets (IA primary) =
    IB (primary `xor` fpHash)
    where
        fpHash = fromIntegral $ hash fp

kickedSecondaryIndex ::
    FingerPrint
    -> Natural
    -> IndexB
    -> IndexB
{-# INLINE kickedSecondaryIndex #-}
kickedSecondaryIndex fp numBuckets (IB alt) =
    secondaryIndex fp numBuckets (IA alt)