-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Hashed file storage support code. -- -- Support code for reading and manipulating hashed file storage (where -- each file and directory is associated with a cryptographic hash, for -- corruption-resistant storage and fast comparisons). -- -- The supported storage formats include darcs hashed pristine, a plain -- filesystem tree and an indexed plain tree (where the index maintains -- hashes of the plain files and directories). @package hashed-storage @version 0.3.8 -- | This module implements relative paths within a Tree. All paths are -- anchored at a certain root (this is usually the Tree root). They are -- represented by a list of Names (these are just strict bytestrings). module Storage.Hashed.AnchoredPath newtype Name Name :: ByteString -> Name newtype AnchoredPath AnchoredPath :: [Name] -> AnchoredPath -- | Append an element to the end of a path. appendPath :: AnchoredPath -> Name -> AnchoredPath -- | Take a root directory and an anchored path and produce a full -- FilePath. Moreover, you can use anchorPath "" to get a -- relative FilePath. anchorPath :: FilePath -> AnchoredPath -> FilePath -- | Check whether a path is a prefix of another path. isPrefix :: AnchoredPath -> AnchoredPath -> Bool -- | Get parent (path) of a given path. foobarbaz -> foo/bar parent :: AnchoredPath -> AnchoredPath -- | List all parents of a given path. foobarbaz -> [foo, -- foo/bar] parents :: AnchoredPath -> [AnchoredPath] -- | Catenate two paths together. Not very safe, but sometimes useful (e.g. -- when you are representing paths relative to a different point than a -- Tree root). catPaths :: AnchoredPath -> AnchoredPath -> AnchoredPath flatten :: AnchoredPath -> ByteString makeName :: String -> Name -- | Unsafe. nameToFilePath :: Name -> FilePath -- | Unsafe. nameFromFilePath :: FilePath -> Name floatBS :: ByteString -> AnchoredPath -- | Take a relative FilePath and turn it into an AnchoredPath. The -- operation is unsafe and if you break it, you keep both pieces. More -- useful for exploratory purposes (ghci) than for serious programming. floatPath :: FilePath -> AnchoredPath instance Eq AnchoredPath instance Show AnchoredPath instance Ord AnchoredPath instance Eq Name instance Show Name instance Ord Name -- | The abstract representation of a Tree and useful abstract utilities to -- handle those. module Storage.Hashed.Tree -- | Abstraction of a filesystem tree. Please note that the Tree returned -- by the respective read operations will have TreeStub items in it. To -- obtain a Tree without such stubs, call expand on it, eg.: -- --
--   tree <- readDarcsPristine "." >>= expand
--   
-- -- When a Tree is expanded, it becomes final. All stubs are forced -- and the Tree can be traversed purely. Access to actual file contents -- stays in IO though. -- -- A Tree may have a Hash associated with it. A pair of Tree's is -- identical whenever their hashes are (the reverse need not hold, since -- not all Trees come equipped with a hash). data Tree data Blob Blob :: !IO ByteString -> !Maybe Hash -> Blob data TreeItem File :: !Blob -> TreeItem SubTree :: !Tree -> TreeItem Stub :: !IO Tree -> !Maybe Hash -> TreeItem data ItemType BlobType :: ItemType TreeType :: ItemType newtype Hash Hash :: (Maybe Int64, ByteString) -> Hash makeTree :: [(Name, TreeItem)] -> Tree makeTreeWithHash :: [(Name, TreeItem)] -> Hash -> Tree emptyTree :: Tree emptyBlob :: Blob -- | Unfold a stubbed Tree into a one with no stubs in it. You might want -- to filter the tree before expanding to save IO. expand :: Tree -> IO Tree -- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path -- is reachable without crossing any stubs. expandPath :: Tree -> AnchoredPath -> IO Tree items :: Tree -> (Map Name TreeItem) -- | List all contents of a Tree. list :: Tree -> [(AnchoredPath, TreeItem)] listImmediate :: Tree -> [(Name, TreeItem)] -- | Get hash of a Tree. This is guaranteed to uniquely identify the Tree -- (including any blob content), as far as cryptographic hashes are -- concerned. Sha256 is recommended. treeHash :: Tree -> (Maybe Hash) -- | Look up a Tree item (an immediate subtree or blob). lookup :: Tree -> Name -> Maybe TreeItem -- | Find a TreeItem by its path. Gives Nothing if the path -- is invalid. find :: Tree -> AnchoredPath -> Maybe TreeItem -- | Find a Blob by its path. Gives Nothing if the path is -- invalid, or does not point to a Blob. findFile :: Tree -> AnchoredPath -> Maybe Blob -- | Find a Tree by its path. Gives Nothing if the path is -- invalid, or does not point to a Tree. findTree :: Tree -> AnchoredPath -> Maybe Tree -- | Get a hash of a TreeItem. May be Nothing. itemHash :: TreeItem -> Maybe Hash itemType :: TreeItem -> ItemType -- | For every pair of corresponding blobs from the two supplied trees, -- evaluate the supplied function and accumulate the results in a list. -- Hint: to get IO actions through, just use sequence on the resulting -- list. NB. This won't expand any stubs. zipCommonFiles :: (AnchoredPath -> Blob -> Blob -> a) -> Tree -> Tree -> [a] -- | For each file in each of the two supplied trees, evaluate the supplied -- function (supplying the corresponding file from the other tree, or -- Nothing) and accumulate the results in a list. Hint: to get IO actions -- through, just use sequence on the resulting list. NB. This won't -- expand any stubs. zipFiles :: (AnchoredPath -> Maybe Blob -> Maybe Blob -> a) -> Tree -> Tree -> [a] zipTrees :: (AnchoredPath -> Maybe TreeItem -> Maybe TreeItem -> a) -> Tree -> Tree -> [a] -- | Cautiously extracts differing subtrees from a pair of Trees. It will -- never do any unneccessary expanding. Tree hashes are used to cut the -- comparison as high up the Tree branches as possible. The result is a -- pair of trees that do not share any identical subtrees. They are -- derived from the first and second parameters respectively and they are -- always fully expanded. It might be advantageous to feed the result -- into zipFiles or zipTrees. diffTrees :: Tree -> Tree -> IO (Tree, Tree) -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use -- with care. read :: Blob -> IO ByteString -- | When implementing a Tree that has complex expanding semantics, the -- finish IO action lets you do arbitrary IO transform on the Tree -- after it is expanded but before it is given to the user by expand. -- (Used to implement Index updates, eg.) finish :: Tree -> Tree -> IO Tree -- | Given a predicate of the form AnchoredPath -> TreeItem -> Bool, -- and a Tree, produce a Tree that only has items for which the predicate -- returned True. The tree might contain stubs. When expanded, these will -- be subject to filtering as well. filter :: (AnchoredPath -> TreeItem -> Bool) -> Tree -> Tree -- | Given two Trees, a guide and a tree, produces a new -- Tree that is a identical to tree, but only has those items -- that are present in both tree and guide. The -- guide Tree may not contain any stubs. restrict :: Tree -> Tree -> Tree modifyTree :: Tree -> AnchoredPath -> Maybe TreeItem -> Tree updateTreePostorder :: (Tree -> Tree) -> Tree -> Tree instance Show ItemType instance Eq ItemType instance Show Tree instance Show TreeItem instance Show Blob -- | This module contains plain tree indexing code. The index itself is a -- CACHE: you should only ever use it as an optimisation and never as a -- primary storage. In practice, this means that when we change index -- format, the application is expected to throw the old index away and -- build a fresh index. Please note that tracking index validity is out -- of scope for this library: this is responsibility of your application. -- It is advisable that in your validity tracking code, you also check -- for format validity (see indexFormatValid) and scrap and -- re-create index when needed. -- -- The index is a binary file that overlays a hashed tree over the -- working copy. This means that every working file and directory has an -- entry in the index, that contains its path and hash and validity data. -- The validity data is a last seen timestamp plus the file size. -- The file hashes are sha256's of the file's content. -- -- There are two entry types, a file entry and a directory entry. Both -- have a common binary format (see Item). The on-disk format is -- best described by peekItem. -- -- For each file, the index has a copy of the timestamp taken at the -- instant when the hash has been computed. This means that when file -- size and timestamp of a file in working copy matches those in the -- index, we assume that the hash stored in the index for given file is -- valid. These hashes are then exposed in the resulting Tree -- object, and can be leveraged by eg. diffTrees to compare many -- files quickly. -- -- You may have noticed that we also keep hashes of directories. These -- are assumed to be valid whenever the complete subtree has had valid -- timestamps. At any point, as soon as a size or timestamp mismatch is -- found, the working file in question is opened, its hash (and timestamp -- and size) is recomputed and updated in-place in the index file -- (everything lives at a fixed offset and is fixed size, so this isn't -- an issue). This is also true of directories: when a file in a -- directory changes hash, this triggers recomputation of all of its -- parent directory hashes; moreover this is done efficiently -- each -- directory is updated at most once during a run. module Storage.Hashed.Index -- | Read an index and build up a Tree object from it, referring to -- current working directory. Any parts of the index that are out of date -- are updated in-place. The result is always an up-to-date index. Also, -- the Tree is stubby and only the pieces of the index that are -- expanded will be actually updated! To implement a subtree query, you -- can use Tree.filter and then expand the result. Otherwise just expand -- the whole tree to avoid unexpected problems. readIndex :: FilePath -> (Tree -> Hash) -> IO Tree -- | Will add and remove files in index to make it match the Tree -- object given (it is an error for the Tree to contain a file or -- directory that does not exist in a plain form in current working -- directory). updateIndexFrom :: FilePath -> (Tree -> Hash) -> Tree -> IO Tree -- | DEPRECATED! Read index (just like readIndex). However, also check that -- the index version matches our expectations and if not, rebuild it from -- the reference (which is provided in form of un-executed action; we -- will only execute it when needed). readOrUpgradeIndex :: FilePath -> (Tree -> Hash) -> IO Tree -> IO Tree -- | Check that a given file is an index file with a format we can handle. -- You should remove and re-create the index whenever this is not true. indexFormatValid :: FilePath -> IO Bool instance Show Item -- | A few darcs-specific utility functions. These are used for reading and -- writing darcs and darcs-compatible hashed trees. module Storage.Hashed.Darcs darcsFormatSize :: (Num a) => a -> ByteString darcsFormatHash :: Hash -> ByteString -- | decode_white interprets the Darcs-specific "encoded" filenames -- produced by darcsEncodeWhite -- --
--   darcsDecodeWhite "hello\32\there" == "hello there"
--   darcsDecodeWhite "hello\92\there" == "hello\there"
--   darcsDecodeWhite "hello\there"    == error "malformed filename"
--   
darcsDecodeWhite :: String -> FilePath -- | encode_white translates whitespace in filenames to a darcs-specific -- format (backslash followed by numerical representation according to -- ord). Note that backslashes are also escaped since they are -- used in the encoding. -- --
--   darcsEncodeWhite "hello there" == "hello\32\there"
--   darcsEncodeWhite "hello\there" == "hello\92\there"
--   
darcsEncodeWhite :: FilePath -> String darcsFormatDir :: Tree -> ByteString darcsParseDir :: ByteString -> [(ItemType, Name, Hash)] -- | Compute a darcs-compatible hash value for a tree-like structure. darcsTreeHash :: Tree -> Hash -- | An experimental monadic interface to Tree mutation. The main idea is -- to simulate IO-ish manipulation of real filesystem (that's the state -- part of the monad), and to keep memory usage down by reasonably often -- dumping the intermediate data to disk and forgetting it. XXX This -- currently does not work as advertised and the monads leak memory. So -- far, I'm at a loss why this happens. module Storage.Hashed.Monad -- | Run a TreeIO action in a hashed setting. The -- initial tree is assumed to be fully available from the -- directory, and any changes will be written out to same. -- Please note that actual filesystem files are never removed. -- -- XXX This somehow manages to leak memory, somewhere. hashedTreeIO :: TreeIO a -> Tree -> FilePath -> IO (a, Tree) -- | Run a TreeIO action in a plain tree setting. Writes out changes -- to the plain tree every now and then (after the action is finished, -- the last tree state is always flushed to disk). XXX Modify the tree -- with filesystem reading and put it back into st (ie. replace the -- in-memory Blobs with normal ones, so the memory can be GCd). plainTreeIO :: TreeIO a -> Tree -> FilePath -> IO (a, Tree) -- | Run a TreeIO action without dumping anything to disk. Useful for -- running tree mutations just for the purpose of getting the resulting -- Tree and throwing it away. virtualTreeIO :: TreeIO a -> Tree -> IO (a, Tree) -- | Grab content of a file in the current Tree at the given path. readFile :: AnchoredPath -> TreeIO ByteString -- | Change content of a file at a given path. The change will be -- eventually flushed to disk, but might be buffered for some time. writeFile :: AnchoredPath -> ByteString -> TreeIO () createDirectory :: AnchoredPath -> TreeIO () rename :: AnchoredPath -> AnchoredPath -> TreeIO () unlink :: AnchoredPath -> TreeIO () -- | Check for existence of a file. fileExists :: AnchoredPath -> TreeIO Bool -- | Check for existence of a directory. directoryExists :: AnchoredPath -> TreeIO Bool -- | Check for existence of a node (file or directory, doesn't matter). exists :: AnchoredPath -> TreeIO Bool tree :: TreeState -> Tree cwd :: TreeState -> AnchoredPath -- | Internal state of the TreeIO monad. Keeps track of the current -- Tree content, unsync'd changes and a current working directory (of the -- monad). data TreeState -- | A TreeIO monad. A sort of like IO but it keeps a -- TreeState around as well, which is a sort of virtual -- filesystem. Depending on how you obtained your TreeIO, the -- actions in your virtual filesystem get somehow reflected in the actual -- real filesystem. For virtualTreeIO, nothing happens in real -- filesystem, however with plainTreeIO, the plain tree will be -- updated every now and then, and with hashedTreeIO a darcs-style -- hashed tree will get updated. type TreeIO = StateT TreeState IO module Storage.Hashed.Packed -- | On-disk format for object storage: we implement a completely loose -- format (one file per object), a compact format stored in a single -- append-only file and an immutable pack format. data Format Loose :: Format Compact :: Format Pack :: Format loosePath :: OS -> Hash -> FilePath looseLookup :: OS -> Hash -> IO (Maybe FileSegment) -- | Object storage block. When used as a hatchery, the loose or compact -- format are preferable, while for mature space, the pack format is more -- useful. data Block Block :: (Hash -> IO (Maybe FileSegment)) -> Int -> Format -> Block blockLookup :: Block -> Hash -> IO (Maybe FileSegment) size :: Block -> Int format :: Block -> Format -- | Object storage. Contains a single hatchery and possibly a -- number of mature space blocks, usually in form of packs. It also keeps -- a list of root pointers and has a way to extract pointers from objects -- (externally supplied). These last two things are used to implement a -- simple GC. data OS OS :: Block -> [Block] -> [Hash] -> (FileSegment -> IO [Hash]) -> FilePath -> OS hatchery :: OS -> Block mature :: OS -> [Block] roots :: OS -> [Hash] references :: OS -> FileSegment -> IO [Hash] rootdir :: OS -> FilePath -- | Reduce number of packs in the object storage. This may both recombine -- packs to eliminate dead objects and join some packs to form bigger -- packs. The set of hashes given is used as roots for GC marking. repack :: OS -> Set Hash -> IO OS -- | Add new objects to the object storage (i.e. put them into hatchery). -- It is safe to call this even on objects that are already present in -- the storage: such objects will be skipped. hatch :: OS -> [ByteString] -> IO OS -- | Reduce hatchery size by moving things into packs. compact :: OS -> IO OS blocksLookup :: [Block] -> Hash -> IO (Maybe (Hash, FileSegment)) lookup :: OS -> Hash -> IO (Maybe FileSegment) -- | Create an empty object storage in given directory, with a hatchery of -- given format. The directory is created if needed, but is assumed to be -- empty. create :: FilePath -> Format -> IO OS readOS :: FilePath -> IO OS createPack :: OS -> [(Hash, FileSegment)] -> IO Block -- | Build a map of live objects (i.e. those reachable from the given -- roots) in a given list of Blocks. live :: OS -> [Block] -> IO (Map Hash FileSegment) -- | Read a Tree in the darcs hashed format from an object storage. This is -- basically the same as readDarcsHashed from Storage.Hashed, but uses an -- object storage instead of traditional darcs filesystem layout. -- Requires the tree root hash as a starting point. readPackedDarcsPristine :: OS -> Hash -> IO Tree -- | Write a Tree into an object storage, using the darcs-style directory -- formatting (and therefore darcs-style hashes). Gives back the object -- storage and the root hash of the stored Tree. NB. The function expects -- that the Tree comes equipped with darcs-style hashes already! writePackedDarcsPristine :: Tree -> OS -> IO (OS, Hash) storePackedDarcsPristine :: Tree -> OS -> IO (OS, Hash) darcsPristineRefs :: FileSegment -> IO [Hash] instance Show Format instance Eq Format module Storage.Hashed -- | Read in a plain directory hierarchy from a filesystem. NB. The -- read function on Blobs with such a Tree is susceptible to file -- content changes. Since we use mmap in read, this will break -- referential transparency and produce unexpected results. Please always -- make sure that all parallel access to the underlying filesystem tree -- never mutates files. Unlink + recreate is fine though (in other words, -- the sync/write operations below are safe). readPlainTree :: FilePath -> IO Tree -- | Read in a darcs-style hashed tree. This is mainly useful for reading -- "pristine.hashed". You need to provide the root hash you are -- interested in (found in _darcs/hashed_inventory). readDarcsHashed :: FilePath -> Hash -> IO Tree -- | Read in a darcs pristine tree. Handles the plain and hashed pristine -- cases. Does not (and will not) handle the no-pristine case, since that -- requires replaying patches. Cf. readDarcsHashed and -- readPlainTree that are used to do the actual Tree -- construction. readDarcsPristine :: FilePath -> IO Tree -- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use -- with care. read :: Blob -> IO ByteString -- | Read in a FileSegment into a Lazy ByteString. Implemented using mmap. readSegment :: FileSegment -> IO ByteString -- | Write out *full* tree to a plain directory structure. If you instead -- want to make incremental updates, refer to Monad.plainTreeIO. writePlainTree :: Tree -> FilePath -> IO () -- | Take a relative FilePath and turn it into an AnchoredPath. The -- operation is unsafe and if you break it, you keep both pieces. More -- useful for exploratory purposes (ghci) than for serious programming. floatPath :: FilePath -> AnchoredPath -- | Take a relative FilePath within a Tree and print the contents of the -- object there. Useful for exploration, less so for serious programming. printPath :: Tree -> FilePath -> IO ()