{-# LANGUAGE ScopedTypeVariables, BangPatterns, TypeSynonymInstances, UndecidableInstances #-} -- | An experimental monadic interface to Tree mutation. The main idea is to -- simulate IO-ish manipulation of real filesystem (that's the state part of -- the monad), and to keep memory usage down by reasonably often dumping the -- intermediate data to disk and forgetting it. The monad interface itself is -- generic, and a number of actual implementations can be used. This module -- provides just 'virtualTreeIO' that never writes any changes, but may trigger -- filesystem reads as appropriate. module Storage.Hashed.Monad ( virtualTreeIO, virtualTreeMonad , readFile, writeFile, createDirectory, rename, unlink , fileExists, directoryExists, exists, withDirectory , currentDirectory , tree, TreeState, TreeMonad, TreeIO, runTreeMonad , PathSet, initialState, replaceItem ) where import Prelude hiding ( readFile, writeFile ) import Storage.Hashed.AnchoredPath import Storage.Hashed.Tree import Storage.Hashed.Hash import Control.Monad.Error( catchError, MonadError ) import Data.List( inits ) import Data.Int( Int64 ) import Data.Maybe( isNothing, isJust ) import qualified Data.ByteString.Lazy.Char8 as BL import Control.Monad.RWS.Strict import qualified Data.Set as S type PathSet = S.Set AnchoredPath -- | Internal state of the 'TreeIO' monad. Keeps track of the current Tree -- content, unsync'd changes and a current working directory (of the monad). data TreeState m = TreeState { tree :: !(Tree m) , changed :: !PathSet , changesize :: !Int64 , sync :: PathSet -> TreeMonad m () } -- | A 'TreeIO' monad. A sort of like IO but it keeps a 'TreeState' around as well, -- which is a sort of virtual filesystem. Depending on how you obtained your -- 'TreeIO', the actions in your virtual filesystem get somehow reflected in the -- actual real filesystem. For 'virtualTreeIO', nothing happens in real -- filesystem, however with 'plainTreeIO', the plain tree will be updated every -- now and then, and with 'hashedTreeIO' a darcs-style hashed tree will get -- updated. type TreeMonad m = RWST AnchoredPath () (TreeState m) m type TreeIO = TreeMonad IO class (Functor m, Monad m) => TreeRO m where currentDirectory :: m AnchoredPath withDirectory :: (MonadError e m) => AnchoredPath -> m a -> m a expandTo :: (MonadError e m) => AnchoredPath -> m () -- | Grab content of a file in the current Tree at the given path. readFile :: (MonadError e m) => AnchoredPath -> m BL.ByteString -- | Check for existence of a node (file or directory, doesn't matter). exists :: (MonadError e m) => AnchoredPath -> m Bool -- | Check for existence of a directory. directoryExists :: (MonadError e m) => AnchoredPath -> m Bool -- | Check for existence of a file. fileExists :: (MonadError e m) => AnchoredPath -> m Bool class TreeRO m => TreeRW m where -- | Change content of a file at a given path. The change will be -- eventually flushed to disk, but might be buffered for some time. writeFile :: (MonadError e m) => AnchoredPath -> BL.ByteString -> m () createDirectory :: (MonadError e m) => AnchoredPath -> m () unlink :: (MonadError e m) => AnchoredPath -> m () rename :: (MonadError e m) => AnchoredPath -> AnchoredPath -> m () initialState :: Tree m -> (PathSet -> TreeMonad m ()) -> TreeState m initialState t s = TreeState { tree = t , changed = S.empty , changesize = 0 , sync = s } flush :: (Monad m) => TreeMonad m () flush = do current <- get modify $ \st -> st { changed = S.empty, changesize = 0 } sync current (changed current) runTreeMonad :: (Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m) runTreeMonad action initial = do let action' = do x <- action flush return x (out, final, _) <- runRWST action' (AnchoredPath []) initial return (out, tree final) -- | Run a TreeIO action without storing any changes. This is useful for -- running monadic tree mutations for obtaining the resulting Tree (as opposed -- to their effect of writing a modified tree to disk). The actions can do both -- read and write -- reads are passed through to the actual filesystem, but the -- writes are held in memory in a form of modified Tree. virtualTreeMonad :: (Monad m) => TreeMonad m a -> Tree m -> m (a, Tree m) virtualTreeMonad action t = runTreeMonad action $ initialState t (\_ -> return ()) virtualTreeIO :: TreeIO a -> Tree IO -> IO (a, Tree IO) virtualTreeIO = virtualTreeMonad -- | Modifies an item in the current Tree. This action keeps an account of the -- modified data, in changed and changesize, for subsequent flush -- operations. Any modifications (as in "modifyTree") are allowed. modifyItem :: (MonadError e m, Functor m, Monad m) => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m () modifyItem path item = do path' <- (`catPaths` path) `fmap` currentDirectory let paths = let (AnchoredPath x) = path' in S.fromList $ map AnchoredPath $ inits x change <- changedSize path' item modify $ \st -> st { tree = modifyTree (tree st) path' item , changed = (S.union paths (changed st)) , changesize = (changesize st + change) } -- | Replace an item with a new version without modifying the content of the -- tree. This does not do any change tracking. Ought to be only used from a -- 'sync' implementation for a particular storage format. The presumed use-case -- is that an existing in-memory Blob is replaced with a one referring to an -- on-disk file. replaceItem :: (MonadError e m, Functor m, Monad m) => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m () replaceItem path item = do path' <- (`catPaths` path) `fmap` currentDirectory modify $ \st -> st { tree = modifyTree (tree st) path' item } changedSize :: (MonadError e m, Functor m, Monad m) => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m Int64 changedSize path item = do x <- get let ch = S.member path (changed x) size (Just (File b)) = lift (BL.length `fmap` readBlob b) size _ = return 0 oldsize <- size $ find (tree x) path newsize <- size item return $! (if ch then newsize - oldsize else newsize) -- | If buffers are becoming large, sync, otherwise do nothing. maybeFlush :: (Monad m) => TreeMonad m () maybeFlush = do x <- gets changesize when (x > 100 * 1024 * 1024) $ flush instance (Monad m, MonadError e m) => TreeRO (TreeMonad m) where expandTo p = do t <- gets tree case find t p of Nothing -> do t' <- lift $ expandPath t p `catchError` \_ -> return t modify $ \st -> st { tree = t' } _ -> return () fileExists p = do expandTo p (isJust . (flip findFile p)) `fmap` gets tree directoryExists p = do expandTo p (isJust . (flip findTree p)) `fmap` gets tree exists p = do expandTo p (isJust . (flip find p)) `fmap` gets tree readFile p = do expandTo p t <- gets tree let f = findFile t p case f of Nothing -> fail $ "No such file " ++ show p Just x -> lift (readBlob x) currentDirectory = ask withDirectory dir = local (\old -> old `catPaths` dir) instance (Functor m, Monad m, MonadError e m) => TreeRW (TreeMonad m) where writeFile p con = do expandTo p modifyItem p (Just blob) maybeFlush where blob = File $ Blob (return con) hash hash = NoHash -- we would like to say "sha256 con" here, but due -- to strictness of Hash in Blob, this would often -- lead to unnecessary computation which would then -- be discarded anyway; we rely on the sync -- implementation to fix up any NoHash occurrences createDirectory p = do expandTo p modifyItem p $ Just $ SubTree emptyTree unlink p = do expandTo p modifyItem p Nothing rename from to = do expandTo from tr <- gets tree let item = find tr from found_to = find tr to unless (isNothing found_to) $ fail $ "Error renaming: destination " ++ show to ++ " exists." unless (isNothing item) $ do modifyItem to item modifyItem from Nothing