-- | Utilities for generating collections of key\/value pairs, and creating runs -- from them. Tests and benchmarks should preferably use these utilities instead -- of (re-)defining their own. module Database.LSMTree.Extras.RunData ( -- * Create runs withRun , withRunAt , withRuns , unsafeCreateRun , unsafeCreateRunAt , simplePath , simplePaths -- * Serialise write buffers , withRunDataAsWriteBuffer , withSerialisedWriteBuffer -- * RunData , RunData (..) , mapRunData , SerialisedRunData , serialiseRunData -- * NonEmptyRunData , NonEmptyRunData (..) , nonEmptyRunData , toRunData , mapNonEmptyRunData , SerialisedNonEmptyRunData -- * QuickCheck , labelRunData , labelNonEmptyRunData , genRunData , shrinkRunData , genNonEmptyRunData , shrinkNonEmptyRunData , liftArbitrary2Map , liftShrink2Map ) where import Control.Exception (bracket, bracket_) import Control.RefCount import Data.Bifoldable (Bifoldable (bifoldMap)) import Data.Bifunctor import Data.Foldable (for_) import Data.Map.NonEmpty (NEMap) import qualified Data.Map.NonEmpty as NEMap import Data.Map.Strict (Map) import qualified Data.Map.Strict as Map import qualified Data.Vector as V import Database.LSMTree.Extras (showPowersOf10) import Database.LSMTree.Extras.Generators () import qualified Database.LSMTree.Internal.BloomFilter as Bloom import Database.LSMTree.Internal.Entry import Database.LSMTree.Internal.MergeSchedule (addWriteBufferEntries) import Database.LSMTree.Internal.Paths import qualified Database.LSMTree.Internal.Paths as Paths import Database.LSMTree.Internal.Run (Run, RunParams (..)) import qualified Database.LSMTree.Internal.Run as Run import Database.LSMTree.Internal.RunAcc (entryWouldFitInPage) import Database.LSMTree.Internal.RunNumber import Database.LSMTree.Internal.Serialise import Database.LSMTree.Internal.UniqCounter import qualified Database.LSMTree.Internal.WriteBuffer as WB import qualified Database.LSMTree.Internal.WriteBufferBlobs as WBB import Database.LSMTree.Internal.WriteBufferWriter (writeWriteBuffer) import qualified System.FS.API as FS import System.FS.API (HasFS) import qualified System.FS.BlockIO.API as FS import System.FS.BlockIO.API (HasBlockIO) import Test.QuickCheck {------------------------------------------------------------------------------- Create runs -------------------------------------------------------------------------------} -- | Create a temporary 'Run' using 'unsafeCreateRun'. withRun :: HasFS IO h -> HasBlockIO IO h -> RefCtx -> Bloom.Salt -> RunParams -> FS.FsPath -> UniqCounter IO -> SerialisedRunData -> (Ref (Run IO h) -> IO a) -> IO a withRun hfs hbio refCtx salt runParams path counter rd = do bracket (unsafeCreateRun hfs hbio refCtx salt runParams path counter rd) releaseRef -- | Create a temporary 'Run' using 'unsafeCreateRunAt'. withRunAt :: HasFS IO h -> HasBlockIO IO h -> RefCtx -> Bloom.Salt -> RunParams -> RunFsPaths -> SerialisedRunData -> (Ref (Run IO h) -> IO a) -> IO a withRunAt hfs hbio refCtx salt runParams path rd = do bracket (unsafeCreateRunAt hfs hbio refCtx salt runParams path rd) releaseRef {-# INLINABLE withRuns #-} -- | Create temporary 'Run's using 'unsafeCreateRun'. withRuns :: HasFS IO h -> HasBlockIO IO h -> RefCtx -> Bloom.Salt -> RunParams -> FS.FsPath -> UniqCounter IO -> [SerialisedRunData] -> ([Ref (Run IO h)] -> IO a) -> IO a withRuns hfs hbio refCtx salt runParams path counter = go where go [] act = act [] go (rd:rds) act = withRun hfs hbio refCtx salt runParams path counter rd $ \r -> go rds $ \rs -> act (r:rs) -- | Like 'unsafeCreateRunAt', but uses a 'UniqCounter' to determine -- the 'RunFsPaths', at a base file path. unsafeCreateRun :: HasFS IO h -> HasBlockIO IO h -> RefCtx -> Bloom.Salt -> RunParams -> FS.FsPath -> UniqCounter IO -> SerialisedRunData -> IO (Ref (Run IO h)) unsafeCreateRun fs hbio refCtx salt runParams path counter rd = do n <- incrUniqCounter counter let fsPaths = RunFsPaths path (uniqueToRunNumber n) unsafeCreateRunAt fs hbio refCtx salt runParams fsPaths rd -- | Flush serialised run data to disk as if it were a write buffer. -- -- This might leak resources if not run with asynchronous exceptions masked. -- Use helper functions like 'withRun' or 'withRuns' instead. -- -- Use of this function should be paired with a 'releaseRef'. unsafeCreateRunAt :: HasFS IO h -> HasBlockIO IO h -> RefCtx -> Bloom.Salt -> RunParams -> RunFsPaths -> SerialisedRunData -> IO (Ref (Run IO h)) unsafeCreateRunAt fs hbio refCtx salt runParams fsPaths (RunData m) = do -- the WBB file path doesn't have to be at a specific place relative to -- the run we want to create, but fsPaths should already point to a unique -- location, so we just append something to not conflict with that. let blobpath = FS.addExtension (runBlobPath fsPaths) ".wb" bracket (WBB.new fs refCtx blobpath) releaseRef $ \wbblobs -> do wb <- WB.fromMap <$> traverse (traverse (WBB.addBlob fs wbblobs)) m Run.fromWriteBuffer fs hbio refCtx salt runParams fsPaths wb wbblobs -- | Create a 'RunFsPaths' using an empty 'FsPath'. The empty path corresponds -- to the "root" or "mount point" of a 'HasFS' instance. simplePath :: Int -> RunFsPaths simplePath n = RunFsPaths (FS.mkFsPath []) (RunNumber n) -- | Like 'simplePath', but for a list. simplePaths :: [Int] -> [RunFsPaths] simplePaths ns = fmap simplePath ns {------------------------------------------------------------------------------- Serialise write buffers -------------------------------------------------------------------------------} -- | Use 'SerialisedRunData' to 'WriteBuffer' and 'WriteBufferBlobs'. withRunDataAsWriteBuffer :: FS.HasFS IO h -> RefCtx -> ResolveSerialisedValue -> WriteBufferFsPaths -> SerialisedRunData -> (WB.WriteBuffer -> Ref (WBB.WriteBufferBlobs IO h) -> IO a) -> IO a withRunDataAsWriteBuffer hfs refCtx f fsPaths rd action = do let es = V.fromList . Map.toList $ unRunData rd let maxn = NumEntries $ V.length es let wbbPath = Paths.writeBufferBlobPath fsPaths bracket (WBB.new hfs refCtx wbbPath) releaseRef $ \wbb -> do (wb, _) <- addWriteBufferEntries hfs f wbb maxn WB.empty es action wb wbb -- | Serialise a 'WriteBuffer' and 'WriteBufferBlobs' to disk and perform an -- 'IO' action. withSerialisedWriteBuffer :: FS.HasFS IO h -> FS.HasBlockIO IO h -> WriteBufferFsPaths -> WB.WriteBuffer -> Ref (WBB.WriteBufferBlobs IO h) -> IO a -> IO a withSerialisedWriteBuffer hfs hbio wbPaths wb wbb = bracket_ (writeWriteBuffer hfs hbio wbPaths wb wbb) $ do for_ [ Paths.writeBufferKOpsPath wbPaths , Paths.writeBufferBlobPath wbPaths , Paths.writeBufferChecksumsPath wbPaths ] $ FS.removeFile hfs {------------------------------------------------------------------------------- RunData -------------------------------------------------------------------------------} -- | A collection of arbitrary key\/value pairs that are suitable for creating -- 'Run's. -- -- Note: 'b ~ Void' should rule out blobs. newtype RunData k v b = RunData { unRunData :: Map k (Entry v b) } deriving stock (Eq, Show) mapRunData :: Ord k' => (k -> k') -> (v -> v') -> (b -> b') -> RunData k v b -> RunData k' v' b' mapRunData f g h = RunData . Map.mapKeys f . Map.map (bimap g h) . unRunData type SerialisedRunData = RunData SerialisedKey SerialisedValue SerialisedBlob serialiseRunData :: (SerialiseKey k, SerialiseValue v, SerialiseValue b) => RunData k v b -> SerialisedRunData serialiseRunData = mapRunData serialiseKey serialiseValue serialiseBlob {------------------------------------------------------------------------------- NonEmptyRunData -------------------------------------------------------------------------------} -- | A collection of arbitrary key\/value pairs that are suitable for creating -- 'Run's. -- -- Note: 'b ~ Void' should rule out blobs. newtype NonEmptyRunData k v b = NonEmptyRunData { unNonEmptyRunData :: NEMap k (Entry v b) } deriving stock (Eq, Show) nonEmptyRunData :: RunData k v b -> Maybe (NonEmptyRunData k v b) nonEmptyRunData (RunData m) = NonEmptyRunData <$> NEMap.nonEmptyMap m toRunData :: NonEmptyRunData k v b -> RunData k v b toRunData (NonEmptyRunData m) = RunData (NEMap.toMap m) mapNonEmptyRunData :: Ord k' => (k -> k') -> (v -> v') -> (b -> b') -> NonEmptyRunData k v b -> NonEmptyRunData k' v' b' mapNonEmptyRunData f g h = NonEmptyRunData . NEMap.mapKeys f . NEMap.map (bimap g h) . unNonEmptyRunData type SerialisedNonEmptyRunData = NonEmptyRunData SerialisedKey SerialisedValue SerialisedBlob {------------------------------------------------------------------------------- QuickCheck -------------------------------------------------------------------------------} labelRunData :: SerialisedRunData -> Property -> Property labelRunData (RunData m) = tabulate "value size" valSizes . tabulate "run length" [runLength] -- We use tabulate here, not label. Otherwise, if multiple RunDatas get -- labelled in a test case, each leads to a separate entry in the -- displayed statistics, which isn't that useful. . tabulate "run has large k/ops" [note] where kops = Map.toList m valSizes = map (showPowersOf10 . sizeofValue) vals runLength = showPowersOf10 (Map.size m) vals = concatMap (bifoldMap pure mempty . snd) kops note | any (not . uncurry entryWouldFitInPage) kops = "has large k/op" | otherwise = "no large k/op" labelNonEmptyRunData :: SerialisedNonEmptyRunData -> Property -> Property labelNonEmptyRunData = labelRunData . toRunData instance ( Ord k, Arbitrary k, Arbitrary v, Arbitrary b ) => Arbitrary (RunData k v b) where arbitrary = genRunData arbitrary arbitrary arbitrary shrink = shrinkRunData shrink shrink shrink instance ( Ord k, Arbitrary k, Arbitrary v, Arbitrary b ) => Arbitrary (NonEmptyRunData k v b) where arbitrary = genNonEmptyRunData arbitrary arbitrary arbitrary shrink = shrinkNonEmptyRunData shrink shrink shrink genRunData :: Ord k => Gen k -> Gen v -> Gen b -> Gen (RunData k v b) genRunData genKey genVal genBlob = RunData <$> liftArbitrary2Map genKey (liftArbitrary2 genVal genBlob) shrinkRunData :: Ord k => (k -> [k]) -> (v -> [v]) -> (b -> [b]) -> RunData k v b -> [RunData k v b] shrinkRunData shrinkKey shrinkVal shrinkBlob = fmap RunData . liftShrink2Map shrinkKey (liftShrink2 shrinkVal shrinkBlob) . unRunData -- | We cannot implement 'Arbitrary2' since we have constraints on @k@. liftArbitrary2Map :: Ord k => Gen k -> Gen v -> Gen (Map k v) liftArbitrary2Map genk genv = Map.fromList <$> liftArbitrary (liftArbitrary2 genk genv) -- | We cannot implement 'Arbitrary2' since we have constraints @k@. liftShrink2Map :: Ord k => (k -> [k]) -> (v -> [v]) -> Map k v -> [Map k v] liftShrink2Map shrinkk shrinkv m = Map.fromList <$> liftShrink (liftShrink2 shrinkk shrinkv) (Map.toList m) genNonEmptyRunData :: Ord k => Gen k -> Gen v -> Gen b -> Gen (NonEmptyRunData k v b) genNonEmptyRunData genKey genVal genBlob = NonEmptyRunData <$> liftArbitrary2NEMap genKey (liftArbitrary2 genVal genBlob) shrinkNonEmptyRunData :: Ord k => (k -> [k]) -> (v -> [v]) -> (b -> [b]) -> NonEmptyRunData k v b -> [NonEmptyRunData k v b] shrinkNonEmptyRunData shrinkKey shrinkVal shrinkBlob = fmap NonEmptyRunData . liftShrink2NEMap shrinkKey (liftShrink2 shrinkVal shrinkBlob) . unNonEmptyRunData -- | We cannot implement 'Arbitrary2' since we have constraints on @k@. liftArbitrary2NEMap :: Ord k => Gen k -> Gen v -> Gen (NEMap k v) liftArbitrary2NEMap genk genv = NEMap.fromList <$> liftArbitrary (liftArbitrary2 genk genv) -- | We cannot implement 'Arbitrary2' since we have constraints @k@. liftShrink2NEMap :: Ord k => (k -> [k]) -> (v -> [v]) -> NEMap k v -> [NEMap k v] liftShrink2NEMap shrinkk shrinkv m = NEMap.fromList <$> liftShrink (liftShrink2 shrinkk shrinkv) (NEMap.toList m)