{-# LANGUAGE CPP #-} {-# LANGUAGE DuplicateRecordFields #-} {-# LANGUAGE NondecreasingIndentation #-} {-# LANGUAGE OverloadedStrings #-} {-# OPTIONS_GHC -Wno-orphans #-} {- | Benchmarks for table unions of an LSM tree. Here is a summary of the table union benchmark. Note that /all/ function calls are made through the "Database.LSMTree.Simple" module's API. __Phase 1: Setup__ The benchmark will setup an initial set of @--table-count@ tables to be unioned together during "__Phase 2__." The size of each generated table is the same and is equal to @--initial-size@. Each created table has @--initial-size@ insertions operations performed on it before being written out to disk as a snapshot. The @--initial-size@ inserted keys in each table are randomly selected from the following range. Each key is unique, meaning that keys are randomly sampled from the range without replacement. \[ \left[\quad 0,\quad 2 * initialSize \quad\right) \] Additionally, the directory in which to isolate the benchmark environment is specified via the @--bench-dir@ command line option. __Phase 2: Measurement__ When generating measurements for the table unions, the benchmark will reload the snapshots of the tables generated in __Phase 1__ from disk. Subsequently, the tables will be "incrementally unioned" together. Once the tables have been loaded and the union initiated, serveral iterations of lookups will be performed. One iteration involves performing a @--batch-count@ number of batches of @--batch-size@ lookups each. We measure the time spent on running an iteration, and we compute how many lookups per second were performed during the iteration. First, 50 iterations are performed /without/ supplying any credits to the unioned table. This establishes a base-line performance picture. Iterations \( \left[ 0, 50 \right) \) measure lookups per seconds on the unioned table with \(100\%\) of the debt remaining. Subsequently, 100 more iterations are performed. Before each of these iterations, a fixed number of credits are supplied to the incremental union table. The series of measurements allows reasoning about table performance over time as the tables debt decreases (at a uniform rate). The number of credits supplied before each iteration is \(1%\) of the total starting debt. After 100 steps, \(100\%\) of the debt will be paid off. Iterations \( \left[ 50, 100 \right) \) measure lookups per second on the unioned table as the remaining debt decreases. Finally, 50 concluding iterations are performed. Since no debt is remaining, no credits are supplied. Rather, these measurements create a "post-payoff" performance picture. Iterations \( \left[ 150, 200 \right) \) measure lookups per seconds on to the unioned table with \(0\%\) of the debt remaining. __Results__ An informative gnuplot script and data file of the benchmark measurements is generated and placed in the @bench-unions@ directory. Run the following command in a shell to generate a PNG of the graph. @ cd bench-unions && gnuplot unions-bench.gnuplot && cd .. @ TODO: explain the baseline table TODO: explain the seed TODO: explain collisions analysis -} module Bench.Unions (main) where import Control.Applicative ((<**>)) import Control.Concurrent.Async (forConcurrently_) import Control.Monad (forM_, void, (>=>)) import Control.Monad.State.Strict (MonadState (..), runState) import qualified Data.ByteString.Short as BS import Data.Foldable (traverse_) import Data.IORef import qualified Data.List as List import Data.List.NonEmpty (NonEmpty (..)) import qualified Data.List.NonEmpty as NE import Data.Monoid import qualified Data.Primitive as P import qualified Data.Set as Set import qualified Data.Vector as V import Data.Word (Word64) import qualified Options.Applicative as O import Prelude hiding (lookup) import qualified System.Clock as Clock import System.Directory (createDirectoryIfMissing) import System.IO import System.Mem (performMajorGC) import qualified System.Random as Random import System.Random (StdGen) import Text.Printf (printf) import qualified Text.Read as Read import Database.LSMTree.Extras (groupsOfN) import qualified Database.LSMTree.Extras.Random as Random import qualified Database.LSMTree.Simple as LSM ------------------------------------------------------------------------------- -- Constant Values ------------------------------------------------------------------------------- baselineTableID :: Int baselineTableID = 0 baselineTableName :: LSM.SnapshotName baselineTableName = makeTableName baselineTableID defaultBenchDir :: FilePath defaultBenchDir = "_bench_unions" defaultInitialSize :: Int defaultInitialSize = 1_000_000 defaultTableCount :: Int defaultTableCount = 10 -- | The default seed is the first 20 digits of Pi. defaultSeed :: Int defaultSeed = 1415926535897932384 ------------------------------------------------------------------------------- -- Keys and values ------------------------------------------------------------------------------- type K = BS.ShortByteString type V = BS.ShortByteString label :: LSM.SnapshotLabel label = LSM.SnapshotLabel "K V B" -- | We generate 34 byte keys by using a PRNG to extend a word64 to 32 bytes -- and then appending two constant bytes. This corresponds relatively closely -- to UTxO keys, which are 32 byte cryptographic hashes, followed by two bytes -- which are typically the 16bit value 0 or 1 (a transaction output index). -- makeKey :: Word64 -> K makeKey seed = case P.runPrimArray $ do v <- P.newPrimArray 5 let g0 = Random.mkStdGen (fromIntegral seed) let (!w0, !g1) = Random.uniform g0 P.writePrimArray v 0 w0 let (!w1, !g2) = Random.uniform g1 P.writePrimArray v 1 w1 let (!w2, !g3) = Random.uniform g2 P.writePrimArray v 2 w2 let (!w3, _g4) = Random.uniform g3 P.writePrimArray v 3 w3 P.writePrimArray v 4 0x3d3d3d3d3d3d3d3d -- ======== case v of P.MutablePrimArray mba -> do _ <- P.resizeMutableByteArray (P.MutableByteArray mba) 34 pure v of (P.PrimArray ba :: P.PrimArray Word64) -> byteArrayToSBS (P.ByteArray ba) -- | \( O(1) \) conversion. byteArrayToSBS :: P.ByteArray -> BS.ShortByteString #if MIN_VERSION_bytestring(0,12,0) byteArrayToSBS ba = BS.ShortByteString ba #else byteArrayToSBS (P.ByteArray ba) = BS.SBS ba #endif -- We use constant value. This shouldn't affect anything. theValue :: V theValue = BS.replicate 60 120 -- 'x' {-# NOINLINE theValue #-} ------------------------------------------------------------------------------- -- Options and commands ------------------------------------------------------------------------------- data GlobalOpts = GlobalOpts { rootDir :: !FilePath -- ^ Session directory. , tableCount :: !Int -- ^ Number of tables in the benchmark , initialSize :: !Int -- ^ Initial size of each table in the benchmark , seed :: !Int -- ^ The seed for the RNG for deterministic behavior, -- use system entropy when not explicitly provided. } deriving stock Show data RunOpts = RunOpts { batchCount :: !Int , batchSize :: !Int } deriving stock Show data Cmd -- | Setup benchmark: generate initial LSM trees etc. = CmdSetup -- | Run collision analysis. | CmdCollisions -- | Run the actual benchmark | CmdRun RunOpts deriving stock Show ------------------------------------------------------------------------------- -- command line interface ------------------------------------------------------------------------------- globalOptsP :: O.Parser GlobalOpts globalOptsP = pure GlobalOpts <*> O.option O.str (O.long "bench-dir" <> O.value defaultBenchDir <> O.showDefault <> O.help "Benchmark directory to put files in") <*> O.option (positiveParser "number of tables") (O.long "table-count" <> O.value defaultTableCount <> O.showDefault <> O.help "Number of tables to benchmark") <*> O.option (positiveParser "size of tables") (O.long "initial-size" <> O.value defaultInitialSize <> O.showDefault <> O.help "Initial size of each table") <*> O.option O.auto (O.long "seed" <> O.value defaultSeed <> O.showDefault <> O.help "Random seed, defaults to the first 20 digits of Pi") where positiveParser str = let validator v | v >= 1 = Right v | otherwise = Left $ unwords [ "Non-positive number for", str <> ":", show v ] in O.eitherReader $ Read.readEither >=> validator cmdP :: O.Parser Cmd cmdP = O.subparser $ mconcat [ O.command "setup" $ O.info (CmdSetup <$ O.helper) (O.progDesc "Setup benchmark by generating required tables") , O.command "collisions" $ O.info (CmdCollisions <$ O.helper) (O.progDesc "Collision analysis, compute shared keys between tables") , O.command "run" $ O.info (CmdRun <$> runOptsP <**> O.helper) (O.progDesc "Proper run, measuring performance and generating a benchmark report") ] runOptsP :: O.Parser RunOpts runOptsP = pure RunOpts <*> O.option O.auto (O.long "batch-count" <> O.value 200 <> O.showDefault <> O.help "Batch count") <*> O.option O.auto (O.long "batch-size" <> O.value 256 <> O.showDefault <> O.help "Batch size") ------------------------------------------------------------------------------- -- measurements ------------------------------------------------------------------------------- -- | Returns number of seconds elapsed timed :: IO a -> IO (a, Double) timed action = do performMajorGC t1 <- Clock.getTime Clock.Monotonic x <- action t2 <- Clock.getTime Clock.Monotonic performMajorGC let !t = fromIntegral (Clock.toNanoSecs (Clock.diffTimeSpec t2 t1)) * 1e-9 pure (x, t) -- | Returns number of seconds elapsed timed_ :: IO () -> IO Double timed_ action = do ((), t) <- timed action pure t ------------------------------------------------------------------------------- -- Setup ------------------------------------------------------------------------------- doSetup :: GlobalOpts -> IO () doSetup gopts = do -- Define some constants let populationBatchSize = 256 entryCount = initialSize gopts -- The key size is twice the specified size because we will delete half -- of the keys in the domain of each table uniformly at random. keyMax = 2 * entryCount - 1 keyMin = 0 tableIDs = tableRange gopts -- Setup RNG let tableRNGs = deriveSetupRNGs gopts $ length tableIDs -- Ensure that our mount point exists on the real file system createDirectoryIfMissing True $ rootDir gopts -- Populate the specified number of tables LSM.withOpenSession (rootDir gopts) $ \session -> do -- Create a "baseline" table -- -- We create a single table that *already has* all the same key value pairs -- which exist in the union of all tables *which are going* to be unioned. -- This way we can compare performance of the union of tables to a -- "baseline" table since both share all the same key value pairs. table_0 <- LSM.newTable @K @V session forConcurrently_ (NE.zip tableIDs tableRNGs) $ \(tID, tRNG) -> do -- Create a new table table_n <- LSM.newTable @K @V session -- Populate the table in batches forM_ (groupsOfN (populationBatchSize * 2) [ keyMin .. keyMax ]) $ \batch -> do let prunedBatch = Random.sampleUniformWithoutReplacement tRNG (NE.length batch `div` 2) $ NE.toList batch let keyInserts = V.fromList [ (makeKey (fromIntegral k), theValue) | k <- prunedBatch ] -- Insert the batch of the randomly selected keys -- into both the baseline table (0) and the current table LSM.inserts table_0 keyInserts LSM.inserts table_n keyInserts LSM.saveSnapshot (makeTableName tID) label table_n -- Finally, save the baseline table LSM.saveSnapshot baselineTableName label table_0 makeTableName :: Show a => a -> LSM.SnapshotName makeTableName n = LSM.toSnapshotName $ "bench_" <> show n tableRange :: GlobalOpts -> NonEmpty Int tableRange gopts = let n1 = succ baselineTableID in NE.fromList [ n1 .. tableCount gopts + baselineTableID ] ------------------------------------------------------------------------------- -- Collision analysis ------------------------------------------------------------------------------- -- | Count duplicate keys in all tables that will be unioned together doCollisionAnalysis :: GlobalOpts -> IO () doCollisionAnalysis gopts = do LSM.withOpenSession (rootDir gopts) $ \session -> do seenRef <- newIORef Set.empty dupRef <- newIORef Set.empty forM_ (tableRange gopts) $ \tID -> do let name = makeTableName tID LSM.withTableFromSnapshot session name label $ \(table :: LSM.Table K V) -> do LSM.withCursor table $ \cursor -> do streamCursor cursor $ \(k, _) -> do seen <- readIORef seenRef if Set.member k seen then modifyIORef dupRef $ Set.insert k else modifyIORef seenRef $ Set.insert k seen <- readIORef seenRef dups <- readIORef dupRef printf "Keys seen at least once: %d\n" $ Set.size seen printf "Keys seen at least twice: %d\n" $ Set.size dups streamCursor :: LSM.Cursor K V -> ((K, V) -> IO ()) -> IO () streamCursor cursor f = go where go = LSM.next cursor >>= \case Nothing -> pure () Just kv -> f kv >> go ------------------------------------------------------------------------------- -- run ------------------------------------------------------------------------------- doRun :: GlobalOpts -> RunOpts -> IO () doRun gopts opts = do -- Perform 3 measurement phases -- * Phase 1: Measure performance before supplying any credits. -- * Phase 2: Measure performance as credits are incrementally supplied and debt is repaid. -- * Phase 3: Measure performance when debt is 0. let rng = deriveRunRNG gopts dataPath = "bench-unions/unions-bench.dat" withFile dataPath WriteMode $ \h -> do hPutStrLn h "# iteration \t baseline (ops/sec) \t union (ops/sec) \t union debt" LSM.withOpenSession (rootDir gopts) $ \session -> do -- Load the baseline table LSM.withTableFromSnapshot session baselineTableName label $ \baselineTable -> do -- Load the union tables withTablesFromSnapshots session label (makeTableName <$> tableRange gopts) $ \inputTables -> do -- Start the incremental union LSM.withIncrementalUnions inputTables $ \unionedTable -> do let measurePerformance :: Int -> Maybe LSM.UnionCredits -> IO () measurePerformance iteration mayCredits = do LSM.supplyUnionCredits unionedTable `traverse_` mayCredits LSM.UnionDebt currDebt <- LSM.remainingUnionDebt unionedTable baselineOpsSec <- timeOpsPerSecond gopts opts baselineTable rng unionOpsSec <- timeOpsPerSecond gopts opts unionedTable rng printf "iteration: %d, baseline: %7.01f ops/sec, union: %7.01f ops/sec, debt: %d\n" iteration baselineOpsSec unionOpsSec currDebt hPutStrLn h $ unwords [ show iteration, show baselineOpsSec , show unionOpsSec, show currDebt ] LSM.UnionDebt totalDebt <- LSM.remainingUnionDebt unionedTable -- Phase 1 measurements: Debt = 100% forM_ [0..50-1] $ \step -> do measurePerformance step Nothing -- Phase 2 measurements: Debt ∈ [0%, 99%] forM_ [50..150-1] $ \step -> do let creditsPerIteration = LSM.UnionCredits ((totalDebt + 99) `div` 100) measurePerformance step (Just creditsPerIteration) -- Phase 3 measurements: Debt = 0% forM_ [150..200-1] $ \step -> do measurePerformance step Nothing -- | Exception-safe opening of multiple snapshots withTablesFromSnapshots :: LSM.Session -> LSM.SnapshotLabel -> (NonEmpty LSM.SnapshotName) -> (NonEmpty (LSM.Table K V) -> IO a) -> IO a withTablesFromSnapshots session snapLabel (x0 :| xs0) k = do LSM.withTableFromSnapshot session x0 snapLabel $ \t0 -> go' (NE.singleton t0) xs0 where go' acc [] = k (NE.reverse acc) go' acc (x:xs) = LSM.withTableFromSnapshot session x snapLabel $ \t -> go' (t NE.<| acc) xs -- | Returns operations per second timeOpsPerSecond :: GlobalOpts -> RunOpts -> LSM.Table K V -> StdGen -> IO Double timeOpsPerSecond gopts opts table g = do t <- timed_ $ sequentialIterations (initialSize gopts) (batchSize opts) (batchCount opts) table g let ops = batchCount opts * batchSize opts opsPerSec = fromIntegral ops / t pure opsPerSec ------------------------------------------------------------------------------- -- PRNG initialisation ------------------------------------------------------------------------------- deriveSetupRNGs :: GlobalOpts -> Int -> NonEmpty Random.StdGen deriveSetupRNGs gOpts amount = let -- g1 is reserved for the run command (_g1, !g2) = deriveInitialForkedRNGs gOpts in NE.fromList $ take amount $ List.unfoldr (Just . Random.splitGen) g2 deriveRunRNG :: GlobalOpts -> Random.StdGen deriveRunRNG gOpts = let -- g2 and its splits are reserved for the setup command (!g1, _g2) = deriveInitialForkedRNGs gOpts in g1 deriveInitialForkedRNGs :: GlobalOpts -> (Random.StdGen, Random.StdGen) deriveInitialForkedRNGs = Random.splitGen . Random.mkStdGen . seed ------------------------------------------------------------------------------- -- Batch generation ------------------------------------------------------------------------------- generateBatch :: Int -- ^ initial size of the table -> Int -- ^ batch size -> StdGen -> (StdGen, V.Vector K) generateBatch initialSize batchSize g = (g', V.map makeKey lookups) where (!g', lookups) = generateBatch' initialSize batchSize g {-# INLINE generateBatch' #-} generateBatch' :: Int -- ^ initial size of the table -> Int -- ^ batch size -> StdGen -> (StdGen, V.Vector Word64) generateBatch' initialSize batchSize g = (g', lookups) where randomKey :: StdGen -> (Word64, StdGen) randomKey = Random.uniformR (0, 2 * fromIntegral initialSize - 1) lookups :: V.Vector Word64 (lookups, !g') = runState (V.replicateM batchSize (state randomKey)) g ------------------------------------------------------------------------------- -- sequential ------------------------------------------------------------------------------- {-# INLINE sequentialIteration #-} sequentialIteration :: Int -- ^ initial size of the table -> Int -- ^ batch size -> LSM.Table K V -> StdGen -> IO StdGen sequentialIteration !initialSize !batchSize !tbl !g = do let (!g', ls) = generateBatch initialSize batchSize g -- lookups !_ <- LSM.lookups tbl ls -- continue to the next batch pure g' sequentialIterations :: Int -- ^ initial size of the table -> Int -- ^ batch size -> Int -- ^ batch count -> LSM.Table K V -> StdGen -> IO () sequentialIterations !initialSize !batchSize !batchCount !tbl !g0 = do void $ forFoldM_ g0 [ 0 .. batchCount - 1 ] $ \_b g -> sequentialIteration initialSize batchSize tbl g ------------------------------------------------------------------------------- -- main ------------------------------------------------------------------------------- main :: IO () main = do hSetBuffering stdout NoBuffering #ifdef NO_IGNORE_ASSERTS putStrLn "WARNING: Benchmarking in debug mode." putStrLn " To benchmark in release mode, pass:" putStrLn " --project-file=cabal.project.release" #endif (gopts, cmd) <- O.customExecParser prefs cliP print gopts print cmd case cmd of CmdCollisions -> doCollisionAnalysis gopts CmdRun opts -> doRun gopts opts CmdSetup -> doSetup gopts where cliP = O.info ((,) <$> globalOptsP <*> cmdP <**> O.helper) O.fullDesc prefs = O.prefs $ O.showHelpOnEmpty <> O.helpShowGlobals <> O.subparserInline ------------------------------------------------------------------------------- -- general utils ------------------------------------------------------------------------------- forFoldM_ :: Monad m => s -> [a] -> (a -> s -> m s) -> m s forFoldM_ !s0 xs0 f = go s0 xs0 where go !s [] = pure s go !s (x:xs) = do !s' <- f x s go s' xs