-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Hashed file storage support code. -- -- Support code for reading and manipulating hashed file storage (where -- each file and directory is associated with a cryptographic hash, for -- corruption-resistant storage and fast comparisons). -- -- The supported storage formats include darcs hashed pristine, a plain -- filesystem tree and an indexed plain tree (where the index maintains -- hashes of the plain files and directories). @package hashed-storage @version 0.3 module Storage.Hashed.AnchoredPath newtype Name Name :: ByteString -> Name newtype AnchoredPath AnchoredPath :: [Name] -> AnchoredPath -- | Append an element to the end of a path. appendPath :: AnchoredPath -> Name -> AnchoredPath -- | Take a root directory and an anchored path and produce a full -- path. anchorPath :: FilePath -> AnchoredPath -> FilePath isPrefix :: AnchoredPath -> AnchoredPath -> Bool parent :: AnchoredPath -> AnchoredPath parents :: AnchoredPath -> [AnchoredPath] catPaths :: AnchoredPath -> AnchoredPath -> AnchoredPath nameToFilePath :: Name -> FilePath nameFromFilePath :: FilePath -> Name floatBS :: ByteString -> AnchoredPath anchorBS :: AnchoredPath -> ByteString instance Eq AnchoredPath instance Show AnchoredPath instance Ord AnchoredPath instance Eq Name instance Show Name instance Ord Name -- | The abstract representation of a Tree and useful abstract utilities to -- handle those. module Storage.Hashed.Tree -- | Abstraction of a filesystem tree. Please note that the Tree returned -- by the respective read operations will have TreeStub items in it. To -- obtain a Tree without such stubs, call unfold on it, eg.: -- --
--   tree <- readDarcsPristine "." >>= unfold
--   
-- -- When a Tree is unfolded, it becomes final. All stubs are forced -- and the Tree can be traversed purely. Access to actual file contents -- stays in IO though. -- -- A Tree may have a Hash associated with it. A pair of Tree's is -- identical whenever their hashes are (the reverse need not hold, since -- not all Trees come equipped with a hash). data Tree data Blob Blob :: !IO ByteString -> !Maybe Hash -> Blob data TreeItem File :: !Blob -> TreeItem SubTree :: !Tree -> TreeItem Stub :: !IO Tree -> !Maybe Hash -> TreeItem data ItemType BlobType :: ItemType TreeType :: ItemType newtype Hash Hash :: (Maybe Int64, ByteString) -> Hash makeTree :: [(Name, TreeItem)] -> Tree makeTreeWithHash :: [(Name, TreeItem)] -> Hash -> Tree -- | Compute a darcs-compatible hash value for a tree-like structure. darcsTreeHash :: Tree -> Hash darcsFormatDir :: Tree -> ByteString emptyTree :: Tree emptyBlob :: Blob -- | Unfold a stubbed Tree into a one with no stubs in it. You might want -- to filter the tree before unfolding to save IO. unfold :: Tree -> IO Tree -- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path -- is reachable without crossing any stubs. unfoldPath :: Tree -> AnchoredPath -> IO Tree items :: Tree -> (Map Name TreeItem) -- | List all contents of a Tree. list :: Tree -> [(AnchoredPath, TreeItem)] listImmediate :: Tree -> [(Name, TreeItem)] -- | Get hash of a Tree. This is guaranteed to uniquely identify the Tree -- (including any blob content), as far as cryptographic hashes are -- concerned. Sha256 is recommended. treeHash :: Tree -> (Maybe Hash) -- | Look up a Tree item (an immediate subtree or blob). lookup :: Tree -> Name -> Maybe TreeItem -- | Find a TreeItem by its path. Gives Nothing if the path -- is invalid. find :: Tree -> AnchoredPath -> Maybe TreeItem -- | Find a Blob by its path. Gives Nothing if the path is -- invalid, or does not point to a Blob. findFile :: Tree -> AnchoredPath -> Maybe Blob -- | Find a Tree by its path. Gives Nothing if the path is -- invalid, or does not point to a Tree. findTree :: Tree -> AnchoredPath -> Maybe Tree -- | Get a hash of a TreeItem. May be Nothing. itemHash :: TreeItem -> Maybe Hash itemType :: TreeItem -> ItemType -- | For every pair of corresponding blobs from the two supplied trees, -- evaluate the supplied function and accumulate the results in a list. -- Hint: to get IO actions through, just use sequence on the resulting -- list. NB. This won't unfold any stubs. zipCommonFiles :: (AnchoredPath -> Blob -> Blob -> a) -> Tree -> Tree -> [a] -- | For each file in each of the two supplied trees, evaluate the supplied -- function (supplying the corresponding file from the other tree, or -- Nothing) and accumulate the results in a list. Hint: to get IO actions -- through, just use sequence on the resulting list. NB. This won't -- unfold any stubs. zipFiles :: (AnchoredPath -> Maybe Blob -> Maybe Blob -> a) -> Tree -> Tree -> [a] zipTrees :: (AnchoredPath -> Maybe TreeItem -> Maybe TreeItem -> a) -> Tree -> Tree -> [a] -- | Cautiously extracts differing subtrees from a pair of Trees. It will -- never do any unneccessary unfolding. Tree hashes are used to cut the -- comparison as high up the Tree branches as possible. The result is a -- pair of trees that do not share any identical subtrees. They are -- derived from the first and second parameters respectively and they are -- always fully unfolded. It might be advantageous to feed the result -- into zipFiles. diffTrees :: Tree -> Tree -> IO (Tree, Tree) -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use -- with care. read :: Blob -> IO ByteString -- | When implementing a Tree that has complex unfolding semantics, the -- finish IO action lets you do arbitrary IO transform on the Tree -- after it is unfolded but before it is given to the user by unfold. -- (Used to implement Index updates, eg.) finish :: Tree -> Tree -> IO Tree -- | Given a predicate of the form AnchoredPath -> TreeItem -> Bool, -- and a Tree, produce a Tree that only has items for which the predicate -- returned True. The tree might contain stubs. When unfolded, these will -- be subject to filtering as well. filter :: (AnchoredPath -> TreeItem -> Bool) -> Tree -> Tree -- | Given two Trees, a guide and a tree, produces a new -- Tree that is a identical to tree, but only has those items -- that are present in both tree and guide. The -- guide Tree may not contain any stubs. restrict :: Tree -> Tree -> Tree modifyTree :: Tree -> AnchoredPath -> Maybe TreeItem -> Tree instance Show ItemType instance Eq ItemType module Storage.Hashed.Index hashToString :: Hash -> String -- | A recursive-ish index structure (as opposed to flat-ish structure, -- which is used by git... It turns out that it's hard to efficiently -- read a flat index with our internal data structures -- we need to turn -- the flat index into a recursive Tree object, which is rather -- expensive...). As a bonus, we can also efficiently implement subtree -- queries this way (cf. readIndex). data Item Item :: ByteString -> ByteString -> ByteString -> Ptr Int64 -> Ptr Int64 -> Item iPath :: Item -> ByteString iName :: Item -> ByteString iHash :: Item -> ByteString iSize :: Item -> Ptr Int64 iAux :: Item -> Ptr Int64 itemSize :: Item -> Int itemSizeI :: (Num a) => Item -> a itemIsDir :: Item -> Bool createItem :: ItemType -> AnchoredPath -> ForeignPtr () -> Int -> IO Item peekItem :: ForeignPtr () -> Int -> Maybe Int -> IO Item -- | Update an existing item with new hash and optionally mtime (give -- Nothing when updating directory entries). update :: Item -> Maybe EpochTime -> Hash -> IO () iHash' :: Item -> IO Hash mmapIndex :: Int -> IO (ForeignPtr a, Int) -- | See readIndex. This version also gives a map from paths to -- items, so the extra per-item data can be used (hash and mtime) -- directly. The map is in a form of IORef, since the data is not -- available until the tree is unfolded. readIndex' :: IO (Tree, IORef (Map AnchoredPath Item)) -- | Read an index and build up a Tree object from it, referring to current -- working directory. Any parts of the index that are out of date are -- updated in-place. The result is always an up-to-date index. Also, the -- Tree is stubby and only the pieces of the index that are unfolded will -- be actually updated! To implement a subtree query, you can use -- Tree.filter and then unfold the result. Otherwise just unfold the -- whole tree to avoid unexpected problems. readIndex :: IO Tree -- | Will add and remove files in index to make it match the Tree object -- given (it is an error for the Tree to contain a file or directory that -- does not exist in a plain form under FilePath). updateIndexFrom :: Tree -> IO Tree instance Show Item module Storage.Hashed.Diff unidiff :: Tree -> Tree -> IO ByteString type Line = ByteString data WeaveLine Common :: Line -> WeaveLine Remove :: Line -> WeaveLine Add :: Line -> WeaveLine Replace :: Line -> Line -> WeaveLine Skip :: Int -> WeaveLine -- | A weave -- two files woven together, with common and differing regions -- marked up. Cf. WeaveLine. type Weave = [WeaveLine] -- | Sort of a sub-weave. type Hunk = [WeaveLine] -- | Produce unified diff (in a string form, ie. formatted) from a pair of -- bytestrings. unifiedDiff :: ByteString -> ByteString -> ByteString -- | Weave two bytestrings. Intermediate data structure for the actual -- unidiff implementation. No skips are produced. weave :: ByteString -> ByteString -> Weave -- | Break up a Weave into hunks. hunks :: Weave -> [Hunk] -- | Reformat a Hunk into a format suitable for unified diff. Replaces are -- turned into add/remove pairs, all removals in a hunk go before all -- adds. Hunks of Common lines are left intact. Produces input suitable -- for reduceContext. unifyHunk :: Hunk -> Hunk -- | Break up a Weave into unified hunks, leaving n lines -- of context around every hunk. Consecutive Common lines not used as -- context are replaced with Skips. reduceContext :: Int -> [Hunk] -> [Hunk] -- | Format a Weave for printing. deweave :: Weave -> ByteString -- | Print a hunked weave in form of an unified diff. Hunk -- boundaries are marked up as Skip lines. Cf. -- reduceContext. printUnified :: Weave -> ByteString instance Show WeaveLine -- | An experimental monadic interface to Tree mutation. The main idea is -- to simulate IO-ish manipulation of real filesystem (that's the state -- part of the monad), and to keep memory usage down by reasonably often -- dumping the intermediate data to disk and forgetting it. XXX This -- currently does not work as advertised and the monads leak memory. So -- far, I'm at a loss why this happens. module Storage.Hashed.Monad -- | Run a TreeIO action in a hashed setting. The initial -- tree is assumed to be fully available from the directory, and -- any changes will be written out to same. Please note that actual -- filesystem files are never removed. -- -- XXX This somehow manages to leak memory, somewhere. hashedTreeIO :: TreeIO a -> Tree -> FilePath -> IO (a, Tree) -- | Run a TreeIO action in a plain tree setting. Writes out changes to the -- plain tree every now and then (after the action is finished, the last -- tree state is always flushed to disk). XXX Modify the tree with -- filesystem reading and put it back into st (ie. replace the in-memory -- Blobs with normal ones, so the memory can be GCd). plainTreeIO :: TreeIO a -> Tree -> FilePath -> IO (a, Tree) -- | Run a TreeIO action without dumping anything to disk. Useful for -- running tree mutations just for the purpose of getting the resulting -- Tree and throwing it away. virtualTreeIO :: TreeIO a -> Tree -> IO (a, Tree) -- | Grab content of a file in the current Tree at the given path. readFile :: AnchoredPath -> TreeIO ByteString -- | Change content of a file at a given path. The change will be -- eventually flushed to disk, but might be buffered for some time. writeFile :: AnchoredPath -> ByteString -> TreeIO () createDirectory :: AnchoredPath -> TreeIO () rename :: AnchoredPath -> AnchoredPath -> TreeIO () unlink :: AnchoredPath -> TreeIO () tree :: TreeState -> Tree cwd :: TreeState -> AnchoredPath data TreeState module Storage.Hashed -- | Read in a plain directory hierarchy from a filesystem. NB. The -- read function on Blobs with such a Tree is susceptible to file -- content changes. Since we use mmap in read, this will break -- referential transparency and produce unexpected results. Please always -- make sure that all parallel access to the underlying filesystem tree -- never mutates files. Unlink + recreate is fine though (in other words, -- the sync/write operations below are safe). readPlainTree :: FilePath -> IO Tree -- | Read in a darcs-style hashed tree. This is mainly useful for reading -- "pristine.hashed". You need to provide the root hash you are -- interested in (found in _darcs/hashed_inventory). readDarcsHashed :: FilePath -> Hash -> IO Tree -- | Read in a darcs pristine tree. Handles the plain and hashed pristine -- cases. Does not (and will not) handle the no-pristine case, since that -- requires replaying patches. Cf. readDarcsHashed and -- readPlainTree that are used to do the actual Tree -- construction. readDarcsPristine :: FilePath -> IO Tree -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use -- with care. read :: Blob -> IO ByteString -- | Read in a FileSegment into a Lazy ByteString. Implemented using mmap. readSegment :: FileSegment -> IO ByteString -- | Write out *full* tree to a plain directory structure. If you instead -- want to make incremental updates, refer to Monad.plainTreeIO. writePlainTree :: Tree -> FilePath -> IO () -- | Take a relative FilePath and turn it into an AnchoredPath. The -- operation is unsafe and if you break it, you keep both pieces. More -- useful for exploratory purposes (ghci) than for serious programming. floatPath :: FilePath -> AnchoredPath -- | Take a relative FilePath within a Tree and print the contents of the -- object there. Useful for exploration, less so for serious programming. printPath :: Tree -> FilePath -> IO ()