{-# LANGUAGE CPP #-} module Main ( main ) where import Control.DeepSeq import Control.Exception (bracket) import Control.Monad import Control.Monad.Class.MonadST import Control.Monad.Primitive import Control.Monad.ST.Strict (ST, runST) import Control.RefCount import Data.Bits ((.&.)) import Data.BloomFilter.Blocked (Bloom) import qualified Data.BloomFilter.Blocked as Bloom import Data.Time import qualified Data.Vector as V import Data.Vector.Algorithms.Merge as Merge import qualified Data.Vector.Generic.Mutable as VGM import qualified Data.Vector.Mutable as VM import qualified Data.Vector.Primitive as VP import qualified Data.Vector.Unboxed.Mutable as VUM import Database.LSMTree.Extras.Orphans () import Database.LSMTree.Extras.UTxO import Database.LSMTree.Internal.Arena (ArenaManager, newArenaManager, withArena) import Database.LSMTree.Internal.Entry (Entry (Insert), NumEntries (..)) import Database.LSMTree.Internal.Index (Index) import qualified Database.LSMTree.Internal.Index as Index (IndexType (Compact)) import Database.LSMTree.Internal.Lookup import Database.LSMTree.Internal.Paths (RunFsPaths (RunFsPaths)) import Database.LSMTree.Internal.Run (Run) import qualified Database.LSMTree.Internal.Run as Run import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..)) import Database.LSMTree.Internal.RunBuilder (RunParams (..)) import qualified Database.LSMTree.Internal.RunBuilder as RunBuilder import Database.LSMTree.Internal.RunNumber import Database.LSMTree.Internal.Serialise (ResolveSerialisedValue, SerialisedKey, serialiseKey, serialiseValue) import qualified Database.LSMTree.Internal.WriteBuffer as WB import qualified Database.LSMTree.Internal.WriteBufferBlobs as WBB import Debug.Trace (traceMarkerIO) import GHC.Stats import Numeric import System.Environment (getArgs) import System.Exit (exitFailure) import qualified System.FS.API as FS import qualified System.FS.BlockIO.API as FS import qualified System.FS.BlockIO.IO as FS import qualified System.FS.IO as FS import System.IO import System.IO.Unsafe (unsafePerformIO) import System.Mem (performMajorGC) import System.Random main :: IO () main = do hSetBuffering stdout NoBuffering args <- getArgs case args of [arg] | let cache = read arg -> benchmarks (if cache then Run.CacheRunData else Run.NoCacheRunData) _ -> do putStrLn "Wrong usage, pass in [True] or [False] for the caching flag." exitFailure -- | The number of entries in the smallest LSM runs is @2^benchmarkSizeBase@. -- -- This is currently set to however many UTXO entries fit into 2MB of disk -- pages. -- -- >>> benchmarkSizeBase -- 14 benchmarkSizeBase :: SizeBase benchmarkSizeBase = floor @Double $ logBase 2 ( fromIntegral @Int ((2 * 1024 * 1024 `div` 4096) * (floor @Double numEntriesFitInPage)) ) -- | The number of lookups to do. This has to be smaller than the total size of -- all the runs (otherwise we will not get true positive probes, which is part -- of the point of this benchmark). -- -- The benchmark might do slightly more lookups, because it generates batches of -- keys of size 'benchmarkGenBatchSize'. This shouldn't affect the results -- significantly, unless 'benchmarkNumLookups' approaches -- 'benchmarkGenBatchSize'. benchmarkNumLookups :: Int benchmarkNumLookups = 1_000_000 -- 10 * the stretch target -- | The size of batches as they are generated by the benchmark. benchmarkGenBatchSize :: Int benchmarkGenBatchSize = 256 benchmarkNumBitsPerEntry :: Double benchmarkNumBitsPerEntry = 10 benchmarkResolveSerialisedValue :: ResolveSerialisedValue benchmarkResolveSerialisedValue = const -- >>> pageBits -- 32768 pageBits :: Int pageBits = 4096 * 8 -- page size in bits -- >>> unusedPageBits -- 32688 unusedPageBits :: Int unusedPageBits = pageBits -- page size in bits - 8 * 8 -- directory - 16 -- last value offset -- >>> entryBits -- 752 entryBits :: Int entryBits = 34 * 8 -- key size + 60 * 8 -- value size -- >>> entryBitsWithOverhead -- 787 entryBitsWithOverhead :: Int entryBitsWithOverhead = entryBits -- key and value size + 1 -- blobref indicator + 2 -- operation type + 16 -- key offset + 16 -- value offset -- >>> numEntriesFitInPage -- 41.53494282083863 numEntriesFitInPage :: Fractional a => a numEntriesFitInPage = fromIntegral unusedPageBits / fromIntegral entryBitsWithOverhead benchSalt :: Bloom.Salt benchSalt = 4 benchmarks :: Run.RunDataCaching -> IO () benchmarks !caching = withFS $ \hfs hbio refCtx -> do #ifdef NO_IGNORE_ASSERTS putStrLn "WARNING: Benchmarking in debug mode." putStrLn " To benchmark in release mode, pass:" putStrLn " --project-file=cabal.project.release" #endif arenaManager <- newArenaManager enabled <- getRTSStatsEnabled unless enabled $ fail "Need RTS +T statistics enabled" let runSizes = lsmStyleRuns benchmarkSizeBase putStrLn "Precomputed run stats:" putStrLn "(numEntries, sizeFactor)" mapM_ print runSizes putStrLn $ "total number of entries:\t " ++ show (totalNumEntries runSizes) putStrLn $ "total number of key lookups:\t " ++ show benchmarkNumLookups unless (totalNumEntriesSanityCheck benchmarkSizeBase runSizes) $ fail "totalNumEntriesSanityCheck failed" unless (totalNumEntries runSizes >= benchmarkNumLookups) $ fail "number of key lookups is more than number of entries" traceMarkerIO "Generating runs" putStr "" -- This initial key RNG is used for both generating the runs and generating -- lookup batches. This ensures that we only only perform true positive -- lookups. Also, lookupsEnv shuffles the generated keys into the different -- runs, such that the generated lookups access the runs in random places -- instead of sequentially. let keyRng0 = mkStdGen 17 (!runs, !blooms, !indexes, !handles) <- lookupsEnv runSizes keyRng0 hfs hbio refCtx caching putStrLn "" traceMarkerIO "Computing statistics for generated runs" let numEntries = V.map Run.size runs numPages = V.map Run.sizeInPages runs nhashes = V.map (Bloom.sizeHashes . Bloom.size) blooms bitsPerEntry = V.zipWith (\b (NumEntries n) -> fromIntegral (Bloom.sizeBits (Bloom.size b)) / fromIntegral n :: Double) blooms numEntries stats = V.zip4 numEntries numPages nhashes bitsPerEntry putStrLn "Actual stats for generated runs:" putStrLn "(numEntries, numPages, numHashes, bits per entry)" mapM_ print stats _ <- putStr "Pausing. Drop caches now! When ready, press enter." >> getLine traceMarkerIO "Running benchmark" putStrLn "" bgenKeyBatches@(x1, y1) <- benchmark "benchGenKeyBatches" "Calculate batches of keys. This serves as a baseline for later benchmark runs to compare against." (pure . benchGenKeyBatches blooms keyRng0) benchmarkNumLookups (0, 0) _bbloomQueries@(x2, y2) <- benchmark "benchBloomQueries" "Calculate batches of keys, and perform bloom queries for each batch. Net time/allocation is the result of subtracting the cost of benchGenKeyBatches." (pure . benchBloomQueries blooms keyRng0) benchmarkNumLookups bgenKeyBatches _bindexSearches <- benchmark "benchIndexSearches" "Calculate batches of keys, perform bloom queries for each batch, and perform index searches for positively queried keys in each batch. Net time/allocation is the result of subtracting the cost of benchGenKeyBatches and benchBloomQueries." (benchIndexSearches arenaManager blooms indexes handles keyRng0) benchmarkNumLookups (x1 + x2, y1 + y2) _bprepLookups <- benchmark "benchPrepLookups" "Calculate batches of keys, and prepare lookups for each batch. This is roughly doing the same amount of work as benchBloomQueries and benchIndexSearches. Net time/allocation is the result of subtracting the cost of benchGenKeyBatches." (benchPrepLookups arenaManager blooms indexes handles keyRng0) benchmarkNumLookups bgenKeyBatches _blookupsIO <- benchmark "benchLookupsIO" "Calculate batches of keys, and perform disk lookups for each batch. This is roughly doing the same as benchPrepLookups, but also performing the disk I/O and resolving values. Net time/allocation is the result of subtracting the cost of benchGenKeyBatches." (\n -> do let wb_unused = WB.empty bracket (WBB.new hfs refCtx (FS.mkFsPath ["wbblobs_unused"])) releaseRef $ \wbblobs_unused -> benchLookupsIO hbio arenaManager benchmarkResolveSerialisedValue wb_unused wbblobs_unused runs blooms indexes handles keyRng0 n) benchmarkNumLookups bgenKeyBatches --TODO: consider adding benchmarks that also use the write buffer traceMarkerIO "Cleaning up" putStrLn "Cleaning up" V.mapM_ releaseRef runs traceMarkerIO "Computing statistics for prepLookups results" putStr "" let !x = classifyLookups blooms keyRng0 benchmarkNumLookups putStrLn "" putStrLn "Statistics for prepLookups:" putStrLn "(positives, fpr, tp, fn, fp, tn)" print x type Alloc = Int benchmark :: String -> String -> (Int -> IO ()) -> Int -> (NominalDiffTime, Alloc) -> IO (NominalDiffTime, Alloc) benchmark name description action n (subtractTime, subtractAlloc) = do traceMarkerIO ("Benchmarking " ++ name) putStrLn $ "Benchmarking " ++ name ++ " ... " putStrLn description performMajorGC allocBefore <- allocated_bytes <$> getRTSStats timeBefore <- getCurrentTime () <- action n timeAfter <- getCurrentTime performMajorGC allocAfter <- allocated_bytes <$> getRTSStats putStrLn "Finished." let allocTotal :: Alloc timeTotal = timeAfter `diffUTCTime` timeBefore allocTotal = fromIntegral allocAfter - fromIntegral allocBefore timeNet = timeTotal - subtractTime allocNet = allocTotal - subtractAlloc timePerKey, allocPerKey :: Double timePerKey = realToFrac timeNet / fromIntegral n allocPerKey = fromIntegral allocNet / fromIntegral n let printStat :: String -> Double -> String -> IO () printStat label v unit = putStrLn $ label ++ showGFloat (Just 2) v (' ':unit) printStat "Time total: " (realToFrac timeTotal) "seconds" printStat "Alloc total: " (fromIntegral allocTotal) "bytes" printStat "Time net: " (realToFrac timeNet) "seconds" printStat "Alloc net: " (fromIntegral allocNet) "bytes" printStat "Time net per key: " timePerKey "seconds" printStat "Alloc net per key: " allocPerKey "bytes" putStrLn "" pure (timeNet, allocNet) -- | (numEntries, sizeFactor) type RunSizeInfo = (Int, Int) type SizeBase = Int -- | Calculate the sizes of a realistic LSM style set of runs. This uses base 4, -- with 4 disk levels, using tiering for internal levels and leveling for the -- final (biggest) level. -- -- Due to the incremental merging, each level actually has (in the worst case) -- 2x the number of runs, hence 8 per level for tiering levels. -- lsmStyleRuns :: SizeBase -> [RunSizeInfo] lsmStyleRuns l1 = replicate 8 (2^(l1+ 0), 1) -- 8 runs at level 1 (tiering) ++ replicate 8 (2^(l1+ 2), 4) -- 8 runs at level 2 (tiering) ++ replicate 8 (2^(l1+ 4), 16) -- 8 runs at level 3 (tiering) ++ replicate 8 (2^(l1+ 6), 64) -- 8 runs at level 4 (tiering) ++ replicate 8 (2^(l1+ 8), 256) -- 8 runs at level 5 (tiering) ++ [(2^(l1+12),4096)] -- 1 run at level 6 (leveling) -- | The total number of entries. -- -- This should be roughly @100_000_000@ when we pass in @benchmarkSizeBase@. -- >>> totalNumEntries (lsmStyleRuns benchmarkSizeBase) -- 111804416 totalNumEntries :: [RunSizeInfo] -> Int totalNumEntries runSizes = sum [ numEntries | (numEntries, _) <- runSizes ] totalNumEntriesSanityCheck :: SizeBase -> [RunSizeInfo] -> Bool totalNumEntriesSanityCheck l1 runSizes = totalNumEntries runSizes == sum [ 2^l1 * sizeFactor | (_, sizeFactor) <- runSizes ] withFS :: (FS.HasFS IO FS.HandleIO -> FS.HasBlockIO IO FS.HandleIO -> RefCtx -> IO a) -> IO a withFS action = withRefCtx $ \refCtx -> FS.withIOHasBlockIO (FS.MountPoint "_bench_lookups") FS.defaultIOCtxParams $ \hfs hbio -> do exists <- FS.doesDirectoryExist hfs (FS.mkFsPath [""]) unless exists $ error ("_bench_lookups directory does not exist") action hfs hbio refCtx -- | Input environment for benchmarking lookup functions. -- -- The idea here is to have a collection of runs corresponding to the sizes used -- in a largeish LSM. In particular, the sizes are in increasing powers of 4. -- The keys in the runs are non-overlapping, and lookups will be true positives -- in only one run. Thus most lookups will be true negatives. -- -- The goal is to benchmark the critical path of performing asynchronous lookups -- serially. -- -- * where the same key is being looked up in many runs, -- * with a true positive lookup in only one run, -- * with true negative lookups in the other runs -- * with false positives lookups in a fraction of the runs according to the -- bloom filters' false positive rates lookupsEnv :: [RunSizeInfo] -> StdGen -- ^ Key RNG -> FS.HasFS IO FS.HandleIO -> FS.HasBlockIO IO FS.HandleIO -> RefCtx -> Run.RunDataCaching -> IO ( V.Vector (Ref (Run IO FS.HandleIO)) , V.Vector (Bloom SerialisedKey) , V.Vector Index , V.Vector (FS.Handle FS.HandleIO) ) lookupsEnv runSizes keyRng0 hfs hbio refCtx caching = do -- create the vector of initial keys (mvec :: VUM.MVector RealWorld UTxOKey) <- VUM.unsafeNew (totalNumEntries runSizes) !keyRng1 <- vectorOfUniforms mvec keyRng0 -- we reuse keyRng0 to generate batches of lookups, so by shuffling the -- vector we ensure that these batches of lookups will do random disk -- access. !_ <- shuffle mvec keyRng1 -- create the runs rbs <- sequence [ RunBuilder.new hfs hbio benchSalt RunParams { runParamCaching = caching, runParamAlloc = RunAllocFixed benchmarkNumBitsPerEntry, runParamIndex = Index.Compact } (RunFsPaths (FS.mkFsPath []) (RunNumber i)) (NumEntries numEntries) | ((numEntries, _), i) <- zip runSizes [0..] ] -- fill the runs putStr "addKeyOp" let zero = serialiseValue zeroUTxOValue foldM_ (\ !i (!rb, !n) -> do let !mvecLocal = VUM.unsafeSlice i n mvec Merge.sort mvecLocal flip VUM.imapM_ mvecLocal $ \ !j !k -> do -- progress when (j .&. 0xFFFF == 0) (putStr ".") void $ RunBuilder.addKeyOp rb (serialiseKey k) (Insert zero) pure (i+n) ) 0 (zip rbs (fmap fst runSizes)) putStr "DONE" -- return runs runs <- V.fromList <$> mapM (Run.fromBuilder refCtx) rbs let blooms = V.map (\(DeRef r) -> Run.runFilter r) runs indexes = V.map (\(DeRef r) -> Run.runIndex r) runs handles = V.map (\(DeRef r) -> Run.runKOpsFile r) runs pure $!! (runs, blooms, indexes, handles) genLookupBatch :: StdGen -> Int -> (V.Vector SerialisedKey, StdGen) genLookupBatch !rng0 !n0 | n0 <= 0 = error "mkBatch: must be positive" | otherwise = runST $ do mres <- VM.unsafeNew n0 go rng0 0 mres where go :: StdGen -> Int -> VM.MVector s SerialisedKey -> ST s (V.Vector SerialisedKey, StdGen) go !rng !i !mres | n0 == i = do !res <- V.unsafeFreeze mres pure (res, rng) | otherwise = do let (!k, !rng') = uniform @UTxOKey @StdGen rng !sk = serialiseKey k VM.write mres i $! sk go rng' (i+1) mres -- | This gives us the baseline cost of calculating batches of keys. benchGenKeyBatches :: V.Vector (Bloom SerialisedKey) -> StdGen -> Int -> () benchGenKeyBatches !bs !keyRng !n | n <= 0 = () | otherwise = let (!_ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize in benchGenKeyBatches bs keyRng' (n-benchmarkGenBatchSize) -- | This gives us the combined cost of calculating batches of keys, and -- performing bloom queries for each batch. benchBloomQueries :: V.Vector (Bloom SerialisedKey) -> StdGen -> Int -> () benchBloomQueries !bs !keyRng !n | n <= 0 = () | otherwise = let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize in bloomQueries benchSalt bs ks `seq` benchBloomQueries bs keyRng' (n-benchmarkGenBatchSize) -- | This gives us the combined cost of calculating batches of keys, performing -- bloom queries for each batch, and performing index searches for each batch. benchIndexSearches :: ArenaManager RealWorld -> V.Vector (Bloom SerialisedKey) -> V.Vector Index -> V.Vector (FS.Handle h) -> StdGen -> Int -> IO () benchIndexSearches !arenaManager !bs !ics !hs !keyRng !n | n <= 0 = pure () | otherwise = do let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize !rkixs = bloomQueries benchSalt bs ks !_ioops <- withArena arenaManager $ \arena -> stToIO $ indexSearches arena ics hs ks rkixs benchIndexSearches arenaManager bs ics hs keyRng' (n-benchmarkGenBatchSize) -- | This gives us the combined cost of calculating batches of keys, and -- preparing lookups for each batch. benchPrepLookups :: ArenaManager RealWorld -> V.Vector (Bloom SerialisedKey) -> V.Vector Index -> V.Vector (FS.Handle h) -> StdGen -> Int -> IO () benchPrepLookups !arenaManager !bs !ics !hs !keyRng !n | n <= 0 = pure () | otherwise = do let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize (!_rkixs, !_ioops) <- withArena arenaManager $ \arena -> stToIO $ prepLookups arena benchSalt bs ics hs ks benchPrepLookups arenaManager bs ics hs keyRng' (n-benchmarkGenBatchSize) -- | This gives us the combined cost of calculating batches of keys, and -- performing disk lookups for each batch. benchLookupsIO :: FS.HasBlockIO IO h -> ArenaManager RealWorld -> ResolveSerialisedValue -> WB.WriteBuffer -> Ref (WBB.WriteBufferBlobs IO h) -> V.Vector (Ref (Run IO h)) -> V.Vector (Bloom SerialisedKey) -> V.Vector Index -> V.Vector (FS.Handle h) -> StdGen -> Int -> IO () benchLookupsIO !hbio !arenaManager !resolve !wb !wbblobs !rs !bs !ics !hs = go where go !keyRng !n | n <= 0 = pure () | otherwise = do let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize !_ <- lookupsIOWithWriteBuffer hbio arenaManager resolve benchSalt wb wbblobs rs bs ics hs ks go keyRng' (n-benchmarkGenBatchSize) {------------------------------------------------------------------------------- Utilities -------------------------------------------------------------------------------} classifyLookups :: V.Vector (Bloom SerialisedKey) -> StdGen -> Int -> ( Int, Double -- (all) positives, fpr , Int, Int -- true positives, false negatives , Int, Int -- false positives, true negatives ) classifyLookups !bs !keyRng0 !n0 = let !positives = unsafePerformIO (putStr "classifyLookups") `seq` loop 0 keyRng0 n0 !tp = n0 !fn = 0 !fp = positives - tp !tn = (V.length bs - 1) * n0 !fpr = fromIntegral fp / (fromIntegral fp + fromIntegral tn) in positives `seq` ( positives, fpr , tp, fn , fp, tn) where loop !positives !keyRng !n | n <= 0 = unsafePerformIO (putStr "DONE") `seq` positives | otherwise = unsafePerformIO (putStr ".") `seq` let (!ks, !keyRng') = genLookupBatch keyRng benchmarkGenBatchSize !rkixs = bloomQueries benchSalt bs ks in loop (positives + VP.length rkixs) keyRng' (n-benchmarkGenBatchSize) -- | Fill a mutable vector with uniformly random values. vectorOfUniforms :: (PrimMonad m, VGM.MVector v a, RandomGen g, Uniform a) => v (PrimState m) a -> g -> m g vectorOfUniforms !vec !g0 = do unsafeIOToPrim $ putStr "vectorOfUniforms" !g0' <- loop 0 g0 unsafeIOToPrim $ putStr "DONE" pure g0' where !n = VGM.length vec loop !i !g | i == n-1 = pure g | otherwise = do when (i .&. 0xFFFF == 0) (unsafeIOToPrim $ putStr ".") let (!x, !g') = uniform g VGM.unsafeWrite vec i x loop (i+1) g' -- https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle shuffle :: (PrimMonad m, VGM.MVector v a, RandomGen g) => v (PrimState m) a -> g -> m g shuffle !xs !g0 = do unsafeIOToPrim $ putStr "shuffle" !g0' <- loop 0 g0 unsafeIOToPrim $ putStr "DONE" pure g0' where !n = VGM.length xs loop !i !g | i == n-1 = pure g | otherwise = do when (i .&. 0xFFFF == 0) (unsafeIOToPrim $ putStr ".") let (!j, !g') = randomR (i, n-1) g VGM.swap xs i j loop (i+1) g'