-- | The abstract representation of a Tree and useful abstract utilities to -- handle those. module Storage.Hashed.Tree ( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..) , makeTree, makeTreeWithHash, darcsTreeHash, darcsFormatDir, emptyTree , emptyBlob -- * Unfolding stubbed (lazy) Trees. -- -- | By default, Tree obtained by a read function is stubbed: it will -- contain Stub items that need to be executed in order to access the -- respective subtrees. 'unfold' will produce an unstubbed Tree. , unfold, unfoldPath -- * Tree access and lookup. , items, list, listImmediate, treeHash , lookup, find, findFile, findTree, itemHash, itemType , zipCommonFiles, zipFiles, zipTrees, diffTrees -- * Files (Blobs). , read -- * Manipulating trees. , finish, filter, restrict, modifyTree ) where import Prelude hiding( lookup, filter, read, all ) import Storage.Hashed.AnchoredPath import Storage.Hashed.Utils( Hash(..), sha256, hashSetSize, darcsFormatHash ) import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy.Char8 as BL import qualified Data.Map as M import Data.Maybe( catMaybes ) import Data.List( sortBy, union, sort ) -------------------------------- -- Tree, Blob and friends -- data Blob = Blob !(IO BL.ByteString) !(Maybe Hash) data TreeItem = File !Blob | SubTree !Tree | Stub !(IO Tree) !(Maybe Hash) data ItemType = BlobType | TreeType deriving (Show, Eq) -- | Abstraction of a filesystem tree. -- Please note that the Tree returned by the respective read operations will -- have TreeStub items in it. To obtain a Tree without such stubs, call -- unfold on it, eg.: -- -- > tree <- readDarcsPristine "." >>= unfold -- -- When a Tree is unfolded, it becomes "final". All stubs are forced and the -- Tree can be traversed purely. Access to actual file contents stays in IO -- though. -- -- A Tree may have a Hash associated with it. A pair of Tree's is identical -- whenever their hashes are (the reverse need not hold, since not all Trees -- come equipped with a hash). data Tree = Tree { items :: (M.Map Name TreeItem) , listImmediate :: [(Name, TreeItem)] -- | Get hash of a Tree. This is guaranteed to uniquely -- identify the Tree (including any blob content), as far as -- cryptographic hashes are concerned. Sha256 is recommended. , treeHash :: !(Maybe Hash) -- | When implementing a Tree that has complex unfolding -- semantics, the "finish" IO action lets you do arbitrary IO -- transform on the Tree after it is unfolded but before it is -- given to the user by unfold. (Used to implement Index -- updates, eg.) , finish :: Tree -> IO Tree } -- | Get a hash of a TreeItem. May be Nothing. itemHash :: TreeItem -> Maybe Hash itemHash (File (Blob _ h)) = h itemHash (SubTree t) = treeHash t itemHash (Stub _ h) = h itemType :: TreeItem -> ItemType itemType (File _) = BlobType itemType (SubTree _) = TreeType itemType (Stub _ _) = TreeType emptyTree :: Tree emptyTree = Tree { items = M.empty , listImmediate = [] , treeHash = Nothing , finish = return } emptyBlob :: Blob emptyBlob = Blob (return BL.empty) Nothing makeTree :: [(Name,TreeItem)] -> Tree makeTree l = Tree { items = M.fromList l , listImmediate = l , treeHash = Nothing , finish = return } makeTreeWithHash :: [(Name,TreeItem)] -> Hash -> Tree makeTreeWithHash l h = Tree { items = M.fromList l , listImmediate = l , treeHash = Just h , finish = return } darcsFormatDir :: Tree -> BL.ByteString darcsFormatDir t = BL.fromChunks $ concatMap string (sortBy cmp $ listImmediate t) where cmp (Name a, _) (Name b, _) = compare a b string (Name name, item) = [ case item of File _ -> BS.pack "file:\n" SubTree _ -> BS.pack "directory:\n" Stub _ _ -> error "Trees with stubs not supported in darcsFormatDir.", name, BS.singleton '\n', case itemHash item of Nothing -> error $ "darcsFormatDir: missing hash on " ++ show name Just h -> darcsFormatHash h, BS.singleton '\n' ] -- | Compute a darcs-compatible hash value for a tree-like structure. darcsTreeHash :: Tree -> Hash darcsTreeHash d = hashSetSize (sha256 bl) $ BL.length bl where bl = darcsFormatDir d ----------------------------------- -- Tree access and lookup -- -- | Look up a 'Tree' item (an immediate subtree or blob). lookup :: Tree -> Name -> Maybe TreeItem lookup t n = M.lookup n (items t) find' :: TreeItem -> AnchoredPath -> Maybe TreeItem find' t (AnchoredPath []) = Just t find' (SubTree t) (AnchoredPath (d : rest)) = case lookup t d of Just sub -> find' sub (AnchoredPath rest) Nothing -> Nothing find' _ _ = Nothing -- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid. find :: Tree -> AnchoredPath -> Maybe TreeItem find = find' . SubTree -- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does -- not point to a Blob. findFile :: Tree -> AnchoredPath -> Maybe Blob findFile t p = case find t p of Just (File x) -> Just x _ -> Nothing -- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does -- not point to a Tree. findTree :: Tree -> AnchoredPath -> Maybe Tree findTree t p = case find t p of Just (SubTree x) -> Just x _ -> Nothing -- | List all contents of a 'Tree'. list :: Tree -> [(AnchoredPath, TreeItem)] list t_ = paths t_ (AnchoredPath []) where paths t p = [ (appendPath p n, i) | (n,i) <- listImmediate t ] ++ concat [ paths subt (appendPath p subn) | (subn, SubTree subt) <- listImmediate t ] -- | Unfold a stubbed Tree into a one with no stubs in it. You might want to -- filter the tree before unfolding to save IO. unfold :: Tree -> IO Tree unfold t_ = unfold' t_ (AnchoredPath []) where unfold' :: Tree -> AnchoredPath -> IO Tree unfold' t path = do unfolded <- sequence [ item n t' path | (n, Stub t' _) <- listImmediate t ] let orig = M.filter (not . isStub) (items t) orig_l = [ i | i <- listImmediate t, not $ isStub $ snd i ] m_unfolded = M.fromList unfolded tree = t { items = M.union orig m_unfolded , listImmediate = orig_l ++ unfolded } finish tree tree subtree name stub path = do let npath = appendPath path name tree <- stub sub <- unfold' tree npath return (name, SubTree sub) item = subtree isStub (Stub _ _) = True isStub _ = False -- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is -- reachable without crossing any stubs. unfoldPath :: Tree -> AnchoredPath -> IO Tree unfoldPath t_ path_ = do unfold' t_ path_ where unfold' t (AnchoredPath [_]) = return t unfold' t (AnchoredPath (n:rest)) = do case lookup t n of (Just (Stub stub _)) -> do unstubbed <- stub amend t n rest unstubbed (Just (SubTree t')) -> amend t n rest t' _ -> fail $ "Descent error in unfoldPath: " ++ show path_ amend t name rest sub = do t' <- unfold' sub (AnchoredPath rest) let orig_l = [ i | i@(n',_) <- listImmediate t, name /= n' ] tree = t { items = M.insert name (SubTree sub) (items t) , listImmediate = (name, SubTree sub) : orig_l } return tree -- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a -- identical to @tree@, but only has those items that are present in both -- @tree@ and @guide@. The @guide@ Tree may not contain any stubs. restrict :: Tree -> Tree -> Tree restrict guide tree = filter accept tree where accept path item = case (find guide path, item) of (Just (SubTree _), SubTree _) -> True (Just (SubTree _), Stub _ _) -> True (Just (File _), File _) -> True (Just (Stub _ _), _) -> error "*sulk* Go away, you, you precondition violator!" (_, _) -> False -- | Given a predicate of the form AnchoredPath -> TreeItem -> Bool, and a -- Tree, produce a Tree that only has items for which the predicate returned -- True. The tree might contain stubs. When unfolded, these will be subject to -- filtering as well. filter :: (AnchoredPath -> TreeItem -> Bool) -> Tree -> Tree filter predicate t_ = filter' t_ (AnchoredPath []) where filter' t path = let subs = (catMaybes [ (,) name `fmap` wibble path name item | (name,item) <- listImmediate t ]) in t { items = M.mapMaybeWithKey (wibble path) $ items t , listImmediate = subs , treeHash = Nothing } wibble path name item = let npath = path `appendPath` name in if predicate npath item then Just $ filterSub npath item else Nothing filterSub npath (SubTree t) = SubTree $ filter' t npath filterSub npath (Stub stub _) = Stub (do x <- stub return $ filter' x npath) Nothing filterSub _ x = x -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with -- care. read :: Blob -> IO BL.ByteString read (Blob r _) = r -- | For every pair of corresponding blobs from the two supplied trees, -- evaluate the supplied function and accumulate the results in a list. Hint: -- to get IO actions through, just use sequence on the resulting list. -- NB. This won't unfold any stubs. zipCommonFiles :: (AnchoredPath -> Blob -> Blob -> a) -> Tree -> Tree -> [a] zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p | (p, File x) <- list b ] -- | For each file in each of the two supplied trees, evaluate the supplied -- function (supplying the corresponding file from the other tree, or Nothing) -- and accumulate the results in a list. Hint: to get IO actions through, just -- use sequence on the resulting list. NB. This won't unfold any stubs. zipFiles :: (AnchoredPath -> Maybe Blob -> Maybe Blob -> a) -> Tree -> Tree -> [a] zipFiles f a b = [ f p (findFile a p) (findFile b p) | p <- paths a `union` paths b ] where paths t = sort [ p | (p, File _) <- list t ] zipTrees :: (AnchoredPath -> Maybe TreeItem -> Maybe TreeItem -> a) -> Tree -> Tree -> [a] zipTrees f a b = [ f p (find a p) (find b p) | p <- reverse (paths a `union` paths b) ] where paths t = sort [ p | (p, _) <- list t ] -- | Cautiously extracts differing subtrees from a pair of Trees. It will never -- do any unneccessary unfolding. Tree hashes are used to cut the comparison as -- high up the Tree branches as possible. The result is a pair of trees that do -- not share any identical subtrees. They are derived from the first and second -- parameters respectively and they are always fully unfolded. It might be -- advantageous to feed the result into 'zipFiles'. diffTrees :: Tree -> Tree -> IO (Tree, Tree) diffTrees left right = if treeHash left `match` treeHash right then return (emptyTree, emptyTree) else diff left right where match (Just h) (Just j) | h == j = True match _ _ = False isFile (File _) = True isFile _ = False notFile = not . isFile subtree :: TreeItem -> IO Tree subtree (Stub x _) = x subtree (SubTree x) = return x subtree (File _) = error "diffTrees tried to descend a File as a subtree" maybeUnfold (Stub x _) = SubTree `fmap` (x >>= unfold) maybeUnfold (SubTree x) = SubTree `fmap` unfold x maybeUnfold i = return i immediateN t = [ n | (n, _) <- listImmediate t ] diff left' right' = do is <- sequence [ case (lookup left' n, lookup right' n) of (Just l, Nothing) -> do l' <- maybeUnfold l return (n, Just l', Nothing) (Nothing, Just r) -> do r' <- maybeUnfold r return (n, Nothing, Just r') (Just l, Just r) | itemHash l `match` itemHash r -> return (n, Nothing, Nothing) | notFile l && notFile r -> do x <- subtree l y <- subtree r (x', y') <- diffTrees x y return (n, Just $ SubTree x', Just $ SubTree y') | isFile l && isFile r -> return (n, Just l, Just r) | otherwise -> do l' <- maybeUnfold l r' <- maybeUnfold r return (n, Just l', Just r') _ -> error "n lookups failed" | n <- immediateN left' `union` immediateN right' ] let is_l = [ (n, l) | (n, Just l ,_) <- is ] is_r = [ (n, r) | (n, _, Just r) <- is ] return (makeTree is_l, makeTree is_r) modifyTree :: Tree -> AnchoredPath -> Maybe TreeItem -> Tree modifyTree _ (AnchoredPath []) (Just (SubTree sub)) = sub modifyTree t (AnchoredPath [n]) (Just item) = t { items = M.insert n item (items t) , listImmediate = (n,item) : subs , treeHash = Nothing } where subs = [ x | x@(n', _) <- listImmediate t, n /= n' ] modifyTree t (AnchoredPath [n]) Nothing = t { items = M.delete n (items t) , listImmediate = subs , treeHash = Nothing } where subs = [ x | x@(n', _) <- listImmediate t, n /= n' ] modifyTree t path@(AnchoredPath (n:r)) item = t { items = M.insert n sub (items t) , listImmediate = (n,sub) : subs , treeHash = Nothing } where subs = [ x | x@(n', _) <- listImmediate t, n /= n' ] modSubtree s = modifyTree s (AnchoredPath r) item sub = case lookup t n of Just (SubTree s) -> SubTree $ modSubtree s Just (Stub s _) -> Stub (do x <- s return $ modSubtree x) Nothing Nothing -> SubTree $ modSubtree emptyTree _ -> error $ "Modify tree at " ++ show path modifyTree _ (AnchoredPath []) (Just (Stub _ _)) = error "Bug in descent in modifyTree." modifyTree _ (AnchoredPath []) (Just (File _)) = error "Bug in descent in modifyTree." modifyTree _ (AnchoredPath []) Nothing = error "Bug in descent in modifyTree."