-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Hashed file storage support code. -- -- Support code for reading and manipulating hashed file storage (where -- each file and directory is associated with a cryptographic hash, for -- corruption-resistant storage and fast comparisons). -- -- The supported storage formats include darcs hashed pristine, a plain -- filesystem tree and an indexed plain tree (where the index maintains -- hashes of the plain files and directories). @package hashed-storage @version 0.4.2 module Storage.Hashed.Hash data Hash SHA256 :: !ByteString -> Hash SHA1 :: !ByteString -> Hash NoHash :: Hash encodeBase64u :: Hash -> ByteString -- | Take a base64/url-encoded string and decode it as a Hash. If -- the string is malformed, yields NoHash. decodeBase64u :: ByteString -> Hash -- | Produce a base16 (ascii-hex) encoded string from a hash. This can be -- turned back into a Hash (see decodeBase16. This is a loss-less -- process. encodeBase16 :: Hash -> ByteString -- | Take a base16-encoded string and decode it as a Hash. If the -- string is malformed, yields NoHash. decodeBase16 :: ByteString -> Hash -- | Compute a sha256 of a (lazy) ByteString. However, although this works -- correctly for any bytestring, it is only efficient if the bytestring -- only has a sigle chunk. sha256 :: ByteString -> Hash rawHash :: Hash -> ByteString match :: Hash -> Hash -> Bool instance Show Hash instance Eq Hash instance Ord Hash instance Read Hash -- | This module implements relative paths within a Tree. All paths are -- anchored at a certain root (this is usually the Tree root). They are -- represented by a list of Names (these are just strict bytestrings). module Storage.Hashed.AnchoredPath newtype Name Name :: ByteString -> Name -- | This is a type of sane file paths. These are always canonic in -- the sense that there are no stray slashes, no .. components and -- similar. They are usually used to refer to a location within a Tree, -- but a relative filesystem path works just as well. These are either -- constructed from individual name components (using appendPath, -- catPaths and makeName), or converted from a FilePath -- (floatPath -- but take care when doing that) or . newtype AnchoredPath AnchoredPath :: [Name] -> AnchoredPath anchoredRoot :: AnchoredPath -- | Append an element to the end of a path. appendPath :: AnchoredPath -> Name -> AnchoredPath -- | Take a root directory and an anchored path and produce a full -- FilePath. Moreover, you can use anchorPath "" to get a -- relative FilePath. anchorPath :: FilePath -> AnchoredPath -> FilePath -- | Check whether a path is a prefix of another path. isPrefix :: AnchoredPath -> AnchoredPath -> Bool -- | Get parent (path) of a given path. foobarbaz -> foo/bar parent :: AnchoredPath -> AnchoredPath -- | List all parents of a given path. foobarbaz -> [foo, -- foo/bar] parents :: AnchoredPath -> [AnchoredPath] -- | Catenate two paths together. Not very safe, but sometimes useful (e.g. -- when you are representing paths relative to a different point than a -- Tree root). catPaths :: AnchoredPath -> AnchoredPath -> AnchoredPath flatten :: AnchoredPath -> ByteString makeName :: String -> Name -- | Unsafe. Only ever use on bytestrings that came from flatten on a -- pre-existing AnchoredPath. floatBS :: ByteString -> AnchoredPath -- | Take a relative FilePath and turn it into an AnchoredPath. The -- operation is (relatively) unsafe. Basically, by using floatPath, you -- are testifying that the argument is a path relative to some common -- root -- i.e. the root of the associated Tree object. Also, -- there are certain invariants about AnchoredPath that this function -- tries hard to preserve, but probably cannot guarantee (i.e. this is a -- best-effort thing). You should sanitize any FilePaths before you -- declare them good by converting into AnchoredPath (using this -- function). floatPath :: FilePath -> AnchoredPath instance Eq AnchoredPath instance Show AnchoredPath instance Ord AnchoredPath instance Eq Name instance Show Name instance Ord Name -- | The abstract representation of a Tree and useful abstract utilities to -- handle those. module Storage.Hashed.Tree -- | Abstraction of a filesystem tree. Please note that the Tree returned -- by the respective read operations will have TreeStub items in it. To -- obtain a Tree without such stubs, call expand on it, eg.: -- --
--   tree <- readDarcsPristine "." >>= expand
--   
-- -- When a Tree is expanded, it becomes "final". All stubs are forced and -- the Tree can be traversed purely. Access to actual file contents stays -- in IO though. -- -- A Tree may have a Hash associated with it. A pair of Tree's is -- identical whenever their hashes are (the reverse need not hold, since -- not all Trees come equipped with a hash). data Tree m data Blob m Blob :: !m ByteString -> !Hash -> Blob m data TreeItem m File :: !Blob m -> TreeItem m SubTree :: !Tree m -> TreeItem m Stub :: !m (Tree m) -> !Hash -> TreeItem m data ItemType BlobType :: ItemType TreeType :: ItemType data Hash SHA256 :: !ByteString -> Hash SHA1 :: !ByteString -> Hash NoHash :: Hash makeTree :: (Monad m) => [(Name, TreeItem m)] -> Tree m makeTreeWithHash :: (Monad m) => [(Name, TreeItem m)] -> Hash -> Tree m emptyTree :: (Monad m) => Tree m emptyBlob :: (Monad m) => Blob m makeBlob :: (Monad m) => ByteString -> Blob m makeBlobBS :: (Monad m) => ByteString -> Blob m expandUpdate :: (Monad m) => (AnchoredPath -> Tree m -> m (Tree m)) -> Tree m -> m (Tree m) -- | Expand a stubbed Tree into a one with no stubs in it. You might want -- to filter the tree before expanding to save IO. This is the basic -- implementation, which may be overriden by some Tree instances (this is -- especially true of the Index case). expand :: (Monad m) => Tree m -> m (Tree m) -- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path -- is reachable without crossing any stubs. expandPath :: (Monad m) => Tree m -> AnchoredPath -> m (Tree m) items :: Tree m -> Map Name (TreeItem m) -- | List all contents of a Tree. list :: Tree m -> [(AnchoredPath, TreeItem m)] listImmediate :: Tree m -> [(Name, TreeItem m)] -- | Get hash of a Tree. This is guaranteed to uniquely identify the Tree -- (including any blob content), as far as cryptographic hashes are -- concerned. Sha256 is recommended. treeHash :: Tree m -> Hash -- | Look up a Tree item (an immediate subtree or blob). lookup :: Tree m -> Name -> Maybe (TreeItem m) -- | Find a TreeItem by its path. Gives Nothing if the path -- is invalid. find :: Tree m -> AnchoredPath -> Maybe (TreeItem m) -- | Find a Blob by its path. Gives Nothing if the path is -- invalid, or does not point to a Blob. findFile :: Tree m -> AnchoredPath -> Maybe (Blob m) -- | Find a Tree by its path. Gives Nothing if the path is -- invalid, or does not point to a Tree. findTree :: Tree m -> AnchoredPath -> Maybe (Tree m) -- | Get a hash of a TreeItem. May be Nothing. itemHash :: TreeItem m -> Hash itemType :: TreeItem m -> ItemType -- | For every pair of corresponding blobs from the two supplied trees, -- evaluate the supplied function and accumulate the results in a list. -- Hint: to get IO actions through, just use sequence on the resulting -- list. NB. This won't expand any stubs. zipCommonFiles :: (AnchoredPath -> Blob m -> Blob m -> a) -> Tree m -> Tree m -> [a] -- | For each file in each of the two supplied trees, evaluate the supplied -- function (supplying the corresponding file from the other tree, or -- Nothing) and accumulate the results in a list. Hint: to get IO actions -- through, just use sequence on the resulting list. NB. This won't -- expand any stubs. zipFiles :: (AnchoredPath -> Maybe (Blob m) -> Maybe (Blob m) -> a) -> Tree m -> Tree m -> [a] zipTrees :: (AnchoredPath -> Maybe (TreeItem m) -> Maybe (TreeItem m) -> a) -> Tree m -> Tree m -> [a] -- | Cautiously extracts differing subtrees from a pair of Trees. It will -- never do any unneccessary expanding. Tree hashes are used to cut the -- comparison as high up the Tree branches as possible. The result is a -- pair of trees that do not share any identical subtrees. They are -- derived from the first and second parameters respectively and they are -- always fully expanded. It might be advantageous to feed the result -- into zipFiles or zipTrees. diffTrees :: (Functor m, Monad m) => Tree m -> Tree m -> m (Tree m, Tree m) -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use -- with care. readBlob :: Blob m -> m ByteString class (Monad m) => FilterTree a m filter :: (FilterTree a m) => (AnchoredPath -> TreeItem m -> Bool) -> a m -> a m -- | Given two Trees, a guide and a tree, produces a new -- Tree that is a identical to tree, but only has those items -- that are present in both tree and guide. The -- guide Tree may not contain any stubs. restrict :: (FilterTree t m, Monad n) => Tree n -> t m -> t m modifyTree :: (Monad m) => Tree m -> AnchoredPath -> Maybe (TreeItem m) -> Tree m -- | Does not expand the tree. updateTree :: (Functor m, Monad m) => (TreeItem m -> m (TreeItem m)) -> Tree m -> m (Tree m) updateSubtrees :: (Tree m -> Tree m) -> Tree m -> Tree m -- | Lay one tree over another. The resulting Tree will look like the base -- (1st parameter) Tree, although any items also present in the overlay -- Tree will be taken from the overlay. It is not allowed to overlay a -- different kind of an object, nor it is allowed for the overlay to add -- new objects to base. This means that the overlay Tree should be a -- subset of the base Tree (although any extraneous items will be ignored -- by the implementation). overlay :: (Functor m, Monad m) => Tree m -> Tree m -> Tree m instance Show ItemType instance Eq ItemType instance (Monad m) => FilterTree Tree m -- | This module contains plain tree indexing code. The index itself is a -- CACHE: you should only ever use it as an optimisation and never as a -- primary storage. In practice, this means that when we change index -- format, the application is expected to throw the old index away and -- build a fresh index. Please note that tracking index validity is out -- of scope for this library: this is responsibility of your application. -- It is advisable that in your validity tracking code, you also check -- for format validity (see indexFormatValid) and scrap and -- re-create index when needed. -- -- The index is a binary file that overlays a hashed tree over the -- working copy. This means that every working file and directory has an -- entry in the index, that contains its path and hash and validity data. -- The validity data is a timestamp plus the file size. The file hashes -- are sha256's of the file's content. -- -- There are two entry types, a file entry and a directory entry. Both -- have a common binary format (see Item). The on-disk format is -- best described by peekItem. -- -- For each file, the index has a copy of the file's last modification -- timestamp taken at the instant when the hash has been computed. This -- means that when file size and timestamp of a file in working copy -- matches those in the index, we assume that the hash stored in the -- index for given file is valid. These hashes are then exposed in the -- resulting Tree object, and can be leveraged by eg. -- diffTrees to compare many files quickly. -- -- You may have noticed that we also keep hashes of directories. These -- are assumed to be valid whenever the complete subtree has been valid. -- At any point, as soon as a size or timestamp mismatch is found, the -- working file in question is opened, its hash (and timestamp and size) -- is recomputed and updated in-place in the index file (everything lives -- at a fixed offset and is fixed size, so this isn't an issue). This is -- also true of directories: when a file in a directory changes hash, -- this triggers recomputation of all of its parent directory hashes; -- moreover this is done efficiently -- each directory is updated at most -- once during an update run. module Storage.Hashed.Index -- | Read an index and build up a Tree object from it, referring to -- current working directory. The initial Index object returned by -- readIndex is not directly useful. However, you can use -- Tree.filter on it. Either way, to obtain the actual Tree -- object, call update. -- -- The usual use pattern is this: -- --
--   do (idx, update) <- readIndex
--      tree <- update =<< filter predicate idx
--   
-- -- The resulting tree will be fully expanded. readIndex :: FilePath -> (Tree IO -> Hash) -> IO Index -- | Will add and remove files in index to make it match the Tree -- object given (it is an error for the Tree to contain a file or -- directory that does not exist in a plain form in current working -- directory). updateIndexFrom :: FilePath -> (Tree IO -> Hash) -> Tree IO -> IO Index -- | Check that a given file is an index file with a format we can handle. -- You should remove and re-create the index whenever this is not true. indexFormatValid :: FilePath -> IO Bool updateIndex :: Index -> IO (Tree IO) type Index = IndexM IO -- | Given pred tree, produce a Tree that only has items -- for which pred returns True. The tree might contain -- stubs. When expanded, these will be subject to filtering as well. filter :: (FilterTree a m) => (AnchoredPath -> TreeItem m -> Bool) -> a m -> a m instance Show Item instance FilterTree IndexM IO -- | An experimental monadic interface to Tree mutation. The main idea is -- to simulate IO-ish manipulation of real filesystem (that's the state -- part of the monad), and to keep memory usage down by reasonably often -- dumping the intermediate data to disk and forgetting it. The monad -- interface itself is generic, and a number of actual implementations -- can be used. This module provides just virtualTreeIO that never -- writes any changes, but may trigger filesystem reads as appropriate. -- -- XXX This currently does not work as advertised and the monads leak -- memory. So far, I'm at a loss why this happens. module Storage.Hashed.Monad virtualTreeIO :: TreeIO a -> Tree IO -> IO (a, Tree IO) -- | Run a TreeIO action without storing any changes. This is useful for -- running monadic tree mutations for obtaining the resulting Tree (as -- opposed to their effect of writing a modified tree to disk). The -- actions can do both read and write -- reads are passed through to the -- actual filesystem, but the writes are held in memory in a form of -- modified Tree. virtualTreeMonad :: (Monad m) => TreeMonad m a -> Tree m -> m (a, Tree m) -- | Grab content of a file in the current Tree at the given path. readFile :: (TreeRO m, MonadError e m) => AnchoredPath -> m ByteString -- | Change content of a file at a given path. The change will be -- eventually flushed to disk, but might be buffered for some time. writeFile :: (TreeRW m, MonadError e m) => AnchoredPath -> ByteString -> m () createDirectory :: (TreeRW m, MonadError e m) => AnchoredPath -> m () rename :: (TreeRW m, MonadError e m) => AnchoredPath -> AnchoredPath -> m () unlink :: (TreeRW m, MonadError e m) => AnchoredPath -> m () -- | Check for existence of a file. fileExists :: (TreeRO m, MonadError e m) => AnchoredPath -> m Bool -- | Check for existence of a directory. directoryExists :: (TreeRO m, MonadError e m) => AnchoredPath -> m Bool -- | Check for existence of a node (file or directory, doesn't matter). exists :: (TreeRO m, MonadError e m) => AnchoredPath -> m Bool withDirectory :: (TreeRO m, MonadError e m) => AnchoredPath -> m a -> m a tree :: TreeState m -> Tree m -- | Internal state of the TreeIO monad. Keeps track of the current -- Tree content, unsync'd changes and a current working directory (of the -- monad). data TreeState m -- | A TreeIO monad. A sort of like IO but it keeps a -- TreeState around as well, which is a sort of virtual -- filesystem. Depending on how you obtained your TreeIO, the -- actions in your virtual filesystem get somehow reflected in the actual -- real filesystem. For virtualTreeIO, nothing happens in real -- filesystem, however with plainTreeIO, the plain tree will be -- updated every now and then, and with hashedTreeIO a -- darcs-style hashed tree will get updated. type TreeMonad m = RWST AnchoredPath () (TreeState m) m type TreeIO = TreeMonad IO runTreeMonad :: (Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m) type PathSet = Set AnchoredPath initialState :: Tree m -> (PathSet -> TreeMonad m ()) -> TreeState m replaceItem :: (MonadError e m, Monad m) => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m () instance (Functor m, Monad m, MonadError e m) => TreeRW (TreeMonad m) instance (Monad m, MonadError e m) => TreeRO (TreeMonad m) -- | The plain format implementation resides in this module. The plain -- format does not use any hashing and basically just wraps a normal -- filesystem tree in the hashed-storage API. -- -- NB. The read function on Blobs coming from a plain tree is -- susceptible to file content changes. Since we use mmap in read, -- this will break referential transparency and produce unexpected -- results. Please always make sure that all parallel access to the -- underlying filesystem tree never mutates files. Unlink + recreate is -- fine though (in other words, the writePlainTree and -- plainTreeIO implemented in this module are safe in this -- respect). module Storage.Hashed.Plain readPlainTree :: FilePath -> IO (Tree IO) -- | Write out full tree to a plain directory structure. If you -- instead want to make incremental updates, refer to -- Storage.Hashed.Monad. writePlainTree :: Tree IO -> FilePath -> IO () -- | Run a TreeIO action in a plain tree setting. Writes out changes -- to the plain tree every now and then (after the action is finished, -- the last tree state is always flushed to disk). XXX Modify the tree -- with filesystem reading and put it back into st (ie. replace the -- in-memory Blobs with normal ones, so the memory can be GCd). plainTreeIO :: TreeIO a -> Tree IO -> FilePath -> IO (a, Tree IO) -- | This module implements an object storage. This is a directory -- on disk containing a content-addressed storage. This is useful for -- storing all kinds of things, particularly filesystem trees, or darcs -- pristine caches and patch objects. However, this is an abstract, flat -- storage: no tree semantics are provided. You just need to provide a -- reference-collecting functionality, computing a list of references for -- any given object. The system provides transparent garbage collection -- and packing. module Storage.Hashed.Packed -- | On-disk format for object storage: we implement a completely loose -- format (one file per object), a compact format stored in a single -- append-only file and an immutable "pack" format. data Format Loose :: Format Compact :: Format Pack :: Format -- | Object storage block. When used as a hatchery, the loose or compact -- format are preferable, while for mature space, the pack format is more -- useful. data Block -- | Object storage. Contains a single "hatchery" and possibly a number of -- mature space blocks, usually in form of packs. It also keeps a list of -- root pointers and has a way to extract pointers from objects -- (externally supplied). These last two things are used to implement a -- simple GC. data OS -- | Add new objects to the object storage (i.e. put them into hatchery). -- It is safe to call this even on objects that are already present in -- the storage: such objects will be skipped. hatch :: OS -> [ByteString] -> IO OS -- | Move things from hatchery into a (new) pack. compact :: OS -> IO OS -- | Reduce number of packs in the object storage. This may both recombine -- packs to eliminate dead objects and join some packs to form bigger -- packs. repack :: OS -> IO OS lookup :: OS -> Hash -> IO (Maybe FileSegment) -- | Create an empty object storage in given directory, with a hatchery of -- given format. The directory is created if needed, but is assumed to be -- empty. create :: FilePath -> Format -> IO OS load :: FilePath -> IO OS format :: Block -> Format blockLookup :: Block -> Hash -> IO (Maybe FileSegment) -- | Build a map of live objects (i.e. those reachable from the given -- roots) in a given list of Blocks. live :: OS -> [Block] -> IO (Map Hash FileSegment) hatchery :: OS -> Block mature :: OS -> [Block] roots :: OS -> [Hash] references :: OS -> FileSegment -> IO [Hash] rootdir :: OS -> FilePath instance Show Format instance Eq Format -- | A few darcs-specific utility functions. These are used for reading and -- writing darcs and darcs-compatible hashed trees. module Storage.Hashed.Darcs -- | darcsDecodeWhite interprets the Darcs-specific "encoded" -- filenames produced by darcsEncodeWhite -- --
--   darcsDecodeWhite "hello\32\there" == "hello there"
--   darcsDecodeWhite "hello\92\there" == "hello\there"
--   darcsDecodeWhite "hello\there"    == error "malformed filename"
--   
darcsDecodeWhite :: String -> FilePath -- | darcsEncodeWhite translates whitespace in filenames to a -- darcs-specific format (backslash followed by numerical representation -- according to ord). Note that backslashes are also escaped since -- they are used in the encoding. -- --
--   darcsEncodeWhite "hello there" == "hello\32\there"
--   darcsEncodeWhite "hello\there" == "hello\92\there"
--   
darcsEncodeWhite :: FilePath -> String darcsEncodeWhiteBS :: ByteString -> ByteString decodeDarcsHash :: ByteString -> Hash decodeDarcsSize :: ByteString -> Maybe Int darcsLocation :: FilePath -> (Maybe Int, Hash) -> FileSegment darcsFormatDir :: Tree m -> Maybe ByteString darcsParseDir :: ByteString -> [(ItemType, Name, Maybe Int, Hash)] -- | Compute a darcs-compatible hash value for a tree-like structure. darcsTreeHash :: Tree m -> Hash darcsUpdateDirHashes :: Tree m -> Tree m darcsUpdateHashes :: (Monad m, Functor m) => Tree m -> m (Tree m) -- | Read and parse a darcs-style hashed directory listing from a given -- dir and with a given hash. readDarcsHashedDir :: FilePath -> (Maybe Int, Hash) -> IO [(ItemType, Name, Maybe Int, Hash)] -- | Read in a darcs-style hashed tree. This is mainly useful for reading -- "pristine.hashed". You need to provide the root hash you are -- interested in (found in _darcs/hashed_inventory). readDarcsHashed :: FilePath -> (Maybe Int, Hash) -> IO (Tree IO) -- | Write a Tree into a darcs-style hashed directory. writeDarcsHashed :: Tree IO -> FilePath -> IO Hash -- | Create a hashed file from a FilePath and content. In case the -- file exists it is kept untouched and is assumed to have the right -- content. XXX Corrupt files should be probably renamed out of the way -- automatically or something (probably when they are being read though). fsCreateHashedFile :: FilePath -> ByteString -> TreeIO () -- | Run a TreeIO action in a hashed setting. The -- initial tree is assumed to be fully available from the -- directory, and any changes will be written out to same. -- Please note that actual filesystem files are never removed. -- -- XXX This somehow manages to leak memory, in some usege scenarios -- (apparently not even all). The only reproducer known so far is -- "gorsvet pull". hashedTreeIO :: TreeIO a -> Tree IO -> FilePath -> IO (a, Tree IO) -- | Read a Tree in the darcs hashed format from an object storage. This is -- basically the same as readDarcsHashed from Storage.Hashed, but uses an -- object storage instead of traditional darcs filesystem layout. -- Requires the tree root hash as a starting point. readPackedDarcsPristine :: OS -> Hash -> IO (Tree IO) -- | Write a Tree into an object storage, using the darcs-style directory -- formatting (and therefore darcs-style hashes). Gives back the object -- storage and the root hash of the stored Tree. NB. The function expects -- that the Tree comes equipped with darcs-style hashes already! writePackedDarcsPristine :: Tree IO -> OS -> IO (OS, Hash) storePackedDarcsPristine :: Tree IO -> OS -> IO (OS, Hash) darcsPristineRefs :: FileSegment -> IO [Hash] module Storage.Hashed readPlainTree :: FilePath -> IO (Tree IO) -- | Read in a darcs-style hashed tree. This is mainly useful for reading -- "pristine.hashed". You need to provide the root hash you are -- interested in (found in _darcs/hashed_inventory). readDarcsHashed :: FilePath -> (Maybe Int, Hash) -> IO (Tree IO) -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use -- with care. readBlob :: Blob m -> m ByteString -- | Write out full tree to a plain directory structure. If you -- instead want to make incremental updates, refer to -- Storage.Hashed.Monad. writePlainTree :: Tree IO -> FilePath -> IO () -- | Write a Tree into a darcs-style hashed directory. writeDarcsHashed :: Tree IO -> FilePath -> IO Hash -- | Take a relative FilePath and turn it into an AnchoredPath. The -- operation is (relatively) unsafe. Basically, by using floatPath, you -- are testifying that the argument is a path relative to some common -- root -- i.e. the root of the associated Tree object. Also, -- there are certain invariants about AnchoredPath that this function -- tries hard to preserve, but probably cannot guarantee (i.e. this is a -- best-effort thing). You should sanitize any FilePaths before you -- declare them good by converting into AnchoredPath (using this -- function). floatPath :: FilePath -> AnchoredPath -- | Take a relative FilePath within a Tree and print the contents of the -- object there. Useful for exploration, less so for serious programming. printPath :: Tree IO -> FilePath -> IO ()