{-# LANGUAGE ScopedTypeVariables, BangPatterns #-} -- | An experimental monadic interface to Tree mutation. The main idea is to -- simulate IO-ish manipulation of real filesystem (that's the state part of -- the monad), and to keep memory usage down by reasonably often dumping the -- intermediate data to disk and forgetting it. XXX This currently does not -- work as advertised and the monads leak memory. So far, I'm at a loss why -- this happens. module Storage.Hashed.Monad ( hashedTreeIO, plainTreeIO, virtualTreeIO , readFile, writeFile, createDirectory, rename, unlink , fileExists, exists , tree, cwd, TreeState, TreeIO ) where import Prelude hiding ( read, catch, readFile, writeFile ) import Storage.Hashed.AnchoredPath import Storage.Hashed.Tree import Storage.Hashed.Utils import Storage.Hashed.Darcs import System.Directory( createDirectoryIfMissing, doesFileExist ) import System.FilePath( () ) import Data.List( inits ) import Data.Int( Int64 ) import Data.Maybe( isNothing, isJust ) import Codec.Compression.GZip( decompress, compress ) import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.ByteString.Char8 as BS import Control.Monad.State.Strict import qualified Data.Set as S -- | Internal state of the 'TreeIO' monad. Keeps track of the current Tree -- content, unsync'd changes and a current working directory (of the monad). data TreeState = TreeState { cwd :: AnchoredPath , tree :: Tree , changed :: S.Set AnchoredPath , changesize :: Int64 , sync :: TreeIO () } -- | A 'TreeIO' monad. A sort of like IO but it keeps a 'TreeState' around as well, -- which is a sort of virtual filesystem. Depending on how you obtained your -- 'TreeIO', the actions in your virtual filesystem get somehow reflected in the -- actual real filesystem. For 'virtualTreeIO', nothing happens in real -- filesystem, however with 'plainTreeIO', the plain tree will be updated every -- now and then, and with 'hashedTreeIO' a darcs-style hashed tree will get -- updated. type TreeIO = StateT TreeState IO initialState :: Tree -> TreeIO () -> TreeState initialState t s = TreeState { cwd = AnchoredPath [] , tree = t , changed = S.empty , changesize = 0 , sync = s } runTreeIO :: TreeIO a -> TreeState -> IO (a, Tree) runTreeIO action initial = do (out, final) <- runStateT (do x <- action get >>= sync return x) initial return (out, tree final) -- | Run a TreeIO action without dumping anything to disk. Useful for running -- tree mutations just for the purpose of getting the resulting Tree and -- throwing it away. virtualTreeIO :: TreeIO a -> Tree -> IO (a, Tree) virtualTreeIO action t = runTreeIO action $ initialState t (return ()) -- | Create a hashed file from a 'FilePath' and content. In case the file exists -- it is kept untouched and is assumed to have the right content. XXX Corrupt -- files should be probably renamed out of the way automatically or something -- (probably when they are being read though). fsCreateHashedFile :: FilePath -> BL.ByteString -> TreeIO () fsCreateHashedFile fn content = liftIO $ do exist <- doesFileExist fn unless exist $ BL.writeFile fn content replaceItemAbs :: AnchoredPath -> Maybe TreeItem -> TreeIO () replaceItemAbs path item = modify $ \st -> st { tree = modifyTree (tree st) path item } replaceItem :: AnchoredPath -> Maybe TreeItem -> TreeIO () replaceItem path item = modify $ \st -> st { tree = modifyTree (tree st) (cwd st `catPaths` path) item } expandTo :: AnchoredPath -> TreeIO () expandTo p = do t <- gets tree t' <- liftIO $ expandPath t p modify $ \st -> st { tree = t' } -- | Run a 'TreeIO' @action@ in a hashed setting. The @initial@ tree is assumed -- to be fully available from the @directory@, and any changes will be written -- out to same. Please note that actual filesystem files are never removed. -- -- XXX This somehow manages to leak memory, somewhere. hashedTreeIO :: TreeIO a -- ^ action -> Tree -- ^ initial -> FilePath -- ^ directory -> IO (a, Tree) hashedTreeIO action t dir = do runTreeIO action $ initialState t syncHashed where syncHashed = do ch <- gets changed modify $ \st -> st { changed = S.empty } forM_ (reverse $ S.toList ch) $ \c -> do let path = anchorPath "" c current <- gets tree case find current c of Just (File b) -> updateFile c b Just (SubTree s) -> updateSub c s _ -> fail $ "Bar at " ++ path updateFile path b@(Blob _ (Just !h)) = do let fn = dir BS.unpack (darcsFormatHash h) nblob = File $ Blob (decompress `fmap` BL.readFile fn) (Just h) newcontent <- liftIO $ compress `fmap` read b fsCreateHashedFile fn newcontent replaceItemAbs path (Just nblob) updateFile path b@(Blob _ Nothing) = do content <- liftIO $ read b let h = hashSetSize (sha256 content) (BL.length content) fn = dir BS.unpack (darcsFormatHash h) nblob = File $ Blob (decompress `fmap` BL.readFile fn) (Just h) newcontent = compress content fsCreateHashedFile fn newcontent replaceItemAbs path (Just nblob) updateSub path s = do let !hash = darcsTreeHash s dirdata = darcsFormatDir s fn = dir BS.unpack (darcsFormatHash $ hash) ns = SubTree (s { treeHash = Just hash }) fsCreateHashedFile fn (compress dirdata) replaceItemAbs path (Just ns) -- | Run a 'TreeIO' action in a plain tree setting. Writes out changes to the -- plain tree every now and then (after the action is finished, the last tree -- state is always flushed to disk). XXX Modify the tree with filesystem -- reading and put it back into st (ie. replace the in-memory Blobs with normal -- ones, so the memory can be GCd). plainTreeIO :: TreeIO a -> Tree -> FilePath -> IO (a, Tree) plainTreeIO action t dir = runTreeIO action $ initialState t syncPlain where syncPlain = do ch <- gets changed modify $ \st -> st { changed = S.empty } current <- gets tree forM_ (S.toList ch) $ \c -> do let path = anchorPath dir c case find current c of Just (File b) -> do liftIO $ read b >>= BL.writeFile path let nblob = File $ Blob (BL.readFile path) Nothing modify $ \st -> st { tree = modifyTree (tree st) c (Just nblob) } Just (SubTree _) -> liftIO $ createDirectoryIfMissing False path _ -> fail $ "Foo at " ++ path -- | Check for existence of a file. fileExists :: AnchoredPath -> TreeIO Bool fileExists p = do expandTo p (isJust . (flip findFile p)) `fmap` gets tree -- | Check for existence of a node (file or directory, doesn't matter). exists :: AnchoredPath -> TreeIO Bool exists p = do expandTo p (isJust . (flip find p)) `fmap` gets tree -- | Grab content of a file in the current Tree at the given path. readFile :: AnchoredPath -> TreeIO BL.ByteString readFile p = do expandTo p t <- gets tree let f = findFile t p case f of Nothing -> fail $ "No such file " ++ show p Just x -> liftIO (read x) -- | Internal. Mark a given path as changed, so the next sync will flush the -- modified object to disk. markChanged :: AnchoredPath -> TreeIO () markChanged p = do x <- get size <- liftIO $ case findFile (tree x) p of Just b -> BL.length `fmap` read b Nothing -> return 0 put $ x { changed = S.union paths (changed x) , changesize = changesize x + size } where paths = let (AnchoredPath x) = p in S.fromList $ map AnchoredPath $ inits x -- | Change content of a file at a given path. The change will be eventually -- flushed to disk, but might be buffered for some time. writeFile :: AnchoredPath -> BL.ByteString -> TreeIO () writeFile p con = do replaceItem p (Just blob) markChanged p maybeSync where blob = File $ Blob (return con) hash hash = Just $ hashSetSize (sha256 con) (BL.length con) createDirectory :: AnchoredPath -> TreeIO () createDirectory p = replaceItem p $ Just $ SubTree emptyTree unlink :: AnchoredPath -> TreeIO () unlink p = replaceItem p Nothing rename :: AnchoredPath -> AnchoredPath -> TreeIO () rename from to = do expandTo from tr <- gets tree let item = find tr from unless (isNothing item) $ do replaceItem to item replaceItem from Nothing -- | If buffers are becoming large, sync, otherwise do nothing. maybeSync :: TreeIO () maybeSync = do x <- gets changesize when (x > 16 * 1024 * 1024) $ get >>= sync