{-# LANGUAGE ParallelListComp #-}
module Main (main) where

import qualified Data.BloomFilter as B (BitsPerEntry, FPR, Hashable, Salt)
import qualified Data.BloomFilter.Blocked as B.Blocked
import qualified Data.BloomFilter.Classic as B.Classic

import           Control.Parallel.Strategies
import           Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
import           Data.List (unfoldr)
import           Math.Regression.Simple
import           System.Environment (getArgs)
import           System.Exit (exitSuccess)
import           System.IO
import           System.Random

import           Prelude hiding (elem)

-- | Write out data files used by gnuplot fpr.plot
main :: IO ()
main = do
    hSetBuffering stdout NoBuffering --for progress reporting

    args <- getArgs
    case args of
      ["Generate"] -> main_generateData
      ["Regression"] -> main_regression
      _   -> do
        putStrLn "Usage: fpr-calc [Generate|Regression]"
        exitSuccess

main_regression :: IO ()
main_regression = do
    s <- readFile "plots/fpr.blocked.gnuplot.data"
    let parseLine l = case words l of
          [w_xs_blocked, _, w_ys_blocked_actual] ->
            ( read w_xs_blocked, read w_ys_blocked_actual )
          _ -> error "failed parse"
        xs_blocked, ys_blocked_actual :: [Double]
        (xs_blocked, ys_blocked_actual) = unzip $ fmap parseLine $ lines s

    let regressionData     :: [(Double, Double)]
        regressionData      = zip xs_blocked
                                  (map (negate . log) ys_blocked_actual)
        regressionBitsToFPR = quadraticFit (\(x,y)->(x,y)) regressionData
        regressionFPRToBits = quadraticFit (\(x,y)->(y,x)) regressionData
    putStrLn ""
    putStrLn "Blocked bloom filter quadratic regressions:"
    putStrLn "bits independent, FPR dependent:"
    print regressionBitsToFPR
    putStrLn ""
    putStrLn "FPR independent, bits dependent:"
    print regressionFPRToBits

main_generateData :: IO ()
main_generateData = do
    withFile "plots/fpr.classic.gnuplot.data" WriteMode $ \h -> do
      hSetBuffering h LineBuffering --for incremental output
      mapM_ (\l -> hPutStrLn h l >> putChar '.') $
        [ unwords [show bitsperkey, show y1, show y2]
        | (bitsperkey, _) <- xs_classic
        | y1              <- ys_classic_calc
        | y2              <- ys_classic_actual
        ]
    putStrLn "Wrote plots/fpr.classic.gnuplot.data"

    withFile "plots/fpr.blocked.gnuplot.data" WriteMode $ \h -> do
      hSetBuffering h LineBuffering --for incremental output
      mapM_ (\l -> hPutStrLn h l >> putChar '.') $
        [ unwords [show bitsperkey, show y1, show y2]
        | (bitsperkey, _) <- xs_blocked
        | y1              <- ys_blocked_calc
        | y2              <- ys_blocked_actual
        ]
    putStrLn "Wrote plots/fpr.blocked.gnuplot.data"
  where
    -- x axis values
    xs_classic =
      [ (bitsperkey, g)
      | bitsperkey <- [2,2.3..20]
      , g          <- mkStdGen <$> [1..3]
      ]
      -- We use fewer points for classic, as it's slower and there's less need.

    xs_blocked =
      [ (bitsperkey, g)
      | bitsperkey <- [2,2.2..24]
      , g          <- mkStdGen <$> [1..9]
      ]

    ys_classic_calc, ys_classic_actual,
      ys_blocked_calc, ys_blocked_actual :: [Double]

    ys_classic_calc = ys_calc classicBloomImpl xs_classic
    ys_blocked_calc = ys_calc blockedBloomImpl xs_blocked

    ys_classic_actual = ys_actual classicBloomImpl xs_classic
    ys_blocked_actual = ys_actual blockedBloomImpl xs_blocked

    ys_calc :: BloomImpl b p s -> [(Double, StdGen)] -> [Double]
    ys_calc BloomImpl{..} xs =
      [ fpr
      | (bitsperkey, _) <- xs
      , let policy = policyForBits bitsperkey
            fpr    = policyFPR policy
      ]

    ys_actual :: BloomImpl b p s -> [(Double, StdGen)] -> [Double]
    ys_actual impl@BloomImpl{..} xs =
      withStrategy (parList rseq) -- eval in parallel
      [ fpr
      | (bitsperkey, g) <- xs
      , let policy   = policyForBits bitsperkey
            fpr_est  = policyFPR policy
            nentries = round (1000 * recip fpr_est)
            fpr      = actualFalsePositiveRate impl policy nentries g
      ]
{-
    -- fpr values in the range 1e-1 .. 1e-6
    ys = [ exp (-log_fpr)
         | log_fpr <- [2.3,2.4 .. 13.8] ]

    xs_classic_calc = xs_calc classicBloomImpl
    xs_blocked_calc = xs_calc blockedBloomImpl

    xs_calc BloomImpl{..} =
      [ bits
      | fpr <- ys
      , let policy = policyForFPR fpr
            bits   = policyBits policy
      ]
-}

actualFalsePositiveRate :: BloomImpl bloom policy size
                        -> policy -> Int -> StdGen -> Double
actualFalsePositiveRate bloomimpl policy n g0 =
    fromIntegral (countFalsePositives bloomimpl policy n g0)
  / fromIntegral n

countFalsePositives :: forall bloom policy size. BloomImpl bloom policy size
                    -> policy -> Int -> StdGen -> Int
countFalsePositives BloomImpl{..} policy n g0 =
    let (!g01, !g02) = splitGen g0

        -- create a random salt
        (!salt, !g03) = uniform g02

        -- create a bloom filter from n elements from g0
        size  = sizeForPolicy policy n

        xs_b :: bloom Int
        !xs_b = unfold size salt nextElement (g01, 0)

        -- and a set, so we can make sure we don't count true positives
        xs_s :: IntSet
        !xs_s = IntSet.fromList (unfoldr nextElement (g01, 0))

        -- now for a different random sequence (that will mostly not overlap)
        -- count the number of false positives
     in length
          [ ()
          | y <- unfoldr nextElement (g03, 0)
          , y `elem` xs_b                -- Bloom filter reports positive
          , not (y `IntSet.member` xs_s) -- but it is not a true positive
          ]
  where
    nextElement :: (StdGen, Int) -> Maybe (Int, (StdGen, Int))
    nextElement (!g, !i)
      | i >= n    = Nothing
      | otherwise = Just (x, (g', i+1))
        where
          (!x, !g') = uniform g

data BloomImpl bloom policy size = BloomImpl {
       policyForBits :: B.BitsPerEntry -> policy,
       policyForFPR  :: B.FPR          -> policy,
       policyBits    :: policy -> B.BitsPerEntry,
       policyFPR     :: policy -> B.FPR,
       sizeForPolicy :: policy -> Int -> size,
       unfold        :: forall a b. B.Hashable a
                     => size -> B.Salt -> (b -> Maybe (a, b)) -> b -> bloom a,
       elem          :: forall a. B.Hashable a => a -> bloom a -> Bool
     }

classicBloomImpl :: BloomImpl B.Classic.Bloom B.Classic.BloomPolicy B.Classic.BloomSize
classicBloomImpl =
    BloomImpl {
       policyForBits = B.Classic.policyForBits,
       policyForFPR  = B.Classic.policyForFPR,
       policyBits    = B.Classic.policyBits,
       policyFPR     = B.Classic.policyFPR,
       sizeForPolicy = B.Classic.sizeForPolicy,
       unfold        = B.Classic.unfold,
       elem          = B.Classic.elem
    }

blockedBloomImpl :: BloomImpl B.Blocked.Bloom B.Blocked.BloomPolicy B.Blocked.BloomSize
blockedBloomImpl =
    BloomImpl {
       policyForBits = B.Blocked.policyForBits,
       policyForFPR  = B.Blocked.policyForFPR,
       policyBits    = B.Blocked.policyBits,
       policyFPR     = B.Blocked.policyFPR,
       sizeForPolicy = B.Blocked.sizeForPolicy,
       unfold        = B.Blocked.unfold,
       elem          = B.Blocked.elem
    }