-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/


-- | Hashed file storage support code.
--   
--   Support code for reading and manipulating hashed file storage (where
--   each file and directory is associated with a cryptographic hash, for
--   corruption-resistant storage and fast comparisons).
--   
--   The supported storage formats include darcs hashed pristine, a plain
--   filesystem tree and an indexed plain tree (where the index maintains
--   hashes of the plain files and directories).
@package hashed-storage
@version 0.3.8


-- | This module implements relative paths within a Tree. All paths are
--   anchored at a certain root (this is usually the Tree root). They are
--   represented by a list of Names (these are just strict bytestrings).
module Storage.Hashed.AnchoredPath
newtype Name
Name :: ByteString -> Name
newtype AnchoredPath
AnchoredPath :: [Name] -> AnchoredPath

-- | Append an element to the end of a path.
appendPath :: AnchoredPath -> Name -> AnchoredPath

-- | Take a <a>root</a> directory and an anchored path and produce a full
--   <a>FilePath</a>. Moreover, you can use <tt>anchorPath ""</tt> to get a
--   relative <a>FilePath</a>.
anchorPath :: FilePath -> AnchoredPath -> FilePath

-- | Check whether a path is a prefix of another path.
isPrefix :: AnchoredPath -> AnchoredPath -> Bool

-- | Get parent (path) of a given path. foo<i>bar</i>baz -&gt; foo/bar
parent :: AnchoredPath -> AnchoredPath

-- | List all parents of a given path. foo<i>bar</i>baz -&gt; [foo,
--   foo/bar]
parents :: AnchoredPath -> [AnchoredPath]

-- | Catenate two paths together. Not very safe, but sometimes useful (e.g.
--   when you are representing paths relative to a different point than a
--   Tree root).
catPaths :: AnchoredPath -> AnchoredPath -> AnchoredPath
flatten :: AnchoredPath -> ByteString
makeName :: String -> Name

-- | Unsafe.
nameToFilePath :: Name -> FilePath

-- | Unsafe.
nameFromFilePath :: FilePath -> Name
floatBS :: ByteString -> AnchoredPath

-- | Take a relative FilePath and turn it into an AnchoredPath. The
--   operation is unsafe and if you break it, you keep both pieces. More
--   useful for exploratory purposes (ghci) than for serious programming.
floatPath :: FilePath -> AnchoredPath
instance Eq AnchoredPath
instance Show AnchoredPath
instance Ord AnchoredPath
instance Eq Name
instance Show Name
instance Ord Name


-- | The abstract representation of a Tree and useful abstract utilities to
--   handle those.
module Storage.Hashed.Tree

-- | Abstraction of a filesystem tree. Please note that the Tree returned
--   by the respective read operations will have TreeStub items in it. To
--   obtain a Tree without such stubs, call expand on it, eg.:
--   
--   <pre>
--   tree &lt;- readDarcsPristine "." &gt;&gt;= expand
--   </pre>
--   
--   When a Tree is expanded, it becomes <a>final</a>. All stubs are forced
--   and the Tree can be traversed purely. Access to actual file contents
--   stays in IO though.
--   
--   A Tree may have a Hash associated with it. A pair of Tree's is
--   identical whenever their hashes are (the reverse need not hold, since
--   not all Trees come equipped with a hash).
data Tree
data Blob
Blob :: !IO ByteString -> !Maybe Hash -> Blob
data TreeItem
File :: !Blob -> TreeItem
SubTree :: !Tree -> TreeItem
Stub :: !IO Tree -> !Maybe Hash -> TreeItem
data ItemType
BlobType :: ItemType
TreeType :: ItemType
newtype Hash
Hash :: (Maybe Int64, ByteString) -> Hash
makeTree :: [(Name, TreeItem)] -> Tree
makeTreeWithHash :: [(Name, TreeItem)] -> Hash -> Tree
emptyTree :: Tree
emptyBlob :: Blob

-- | Unfold a stubbed Tree into a one with no stubs in it. You might want
--   to filter the tree before expanding to save IO.
expand :: Tree -> IO Tree

-- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path
--   is reachable without crossing any stubs.
expandPath :: Tree -> AnchoredPath -> IO Tree
items :: Tree -> (Map Name TreeItem)

-- | List all contents of a <a>Tree</a>.
list :: Tree -> [(AnchoredPath, TreeItem)]
listImmediate :: Tree -> [(Name, TreeItem)]

-- | Get hash of a Tree. This is guaranteed to uniquely identify the Tree
--   (including any blob content), as far as cryptographic hashes are
--   concerned. Sha256 is recommended.
treeHash :: Tree -> (Maybe Hash)

-- | Look up a <a>Tree</a> item (an immediate subtree or blob).
lookup :: Tree -> Name -> Maybe TreeItem

-- | Find a <a>TreeItem</a> by its path. Gives <a>Nothing</a> if the path
--   is invalid.
find :: Tree -> AnchoredPath -> Maybe TreeItem

-- | Find a <a>Blob</a> by its path. Gives <a>Nothing</a> if the path is
--   invalid, or does not point to a Blob.
findFile :: Tree -> AnchoredPath -> Maybe Blob

-- | Find a <a>Tree</a> by its path. Gives <a>Nothing</a> if the path is
--   invalid, or does not point to a Tree.
findTree :: Tree -> AnchoredPath -> Maybe Tree

-- | Get a hash of a TreeItem. May be Nothing.
itemHash :: TreeItem -> Maybe Hash
itemType :: TreeItem -> ItemType

-- | For every pair of corresponding blobs from the two supplied trees,
--   evaluate the supplied function and accumulate the results in a list.
--   Hint: to get IO actions through, just use sequence on the resulting
--   list. NB. This won't expand any stubs.
zipCommonFiles :: (AnchoredPath -> Blob -> Blob -> a) -> Tree -> Tree -> [a]

-- | For each file in each of the two supplied trees, evaluate the supplied
--   function (supplying the corresponding file from the other tree, or
--   Nothing) and accumulate the results in a list. Hint: to get IO actions
--   through, just use sequence on the resulting list. NB. This won't
--   expand any stubs.
zipFiles :: (AnchoredPath -> Maybe Blob -> Maybe Blob -> a) -> Tree -> Tree -> [a]
zipTrees :: (AnchoredPath -> Maybe TreeItem -> Maybe TreeItem -> a) -> Tree -> Tree -> [a]

-- | Cautiously extracts differing subtrees from a pair of Trees. It will
--   never do any unneccessary expanding. Tree hashes are used to cut the
--   comparison as high up the Tree branches as possible. The result is a
--   pair of trees that do not share any identical subtrees. They are
--   derived from the first and second parameters respectively and they are
--   always fully expanded. It might be advantageous to feed the result
--   into <a>zipFiles</a> or <a>zipTrees</a>.
diffTrees :: Tree -> Tree -> IO (Tree, Tree)

-- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use
--   with care.
read :: Blob -> IO ByteString

-- | When implementing a Tree that has complex expanding semantics, the
--   <a>finish</a> IO action lets you do arbitrary IO transform on the Tree
--   after it is expanded but before it is given to the user by expand.
--   (Used to implement Index updates, eg.)
finish :: Tree -> Tree -> IO Tree

-- | Given a predicate of the form AnchoredPath -&gt; TreeItem -&gt; Bool,
--   and a Tree, produce a Tree that only has items for which the predicate
--   returned True. The tree might contain stubs. When expanded, these will
--   be subject to filtering as well.
filter :: (AnchoredPath -> TreeItem -> Bool) -> Tree -> Tree

-- | Given two Trees, a <tt>guide</tt> and a <tt>tree</tt>, produces a new
--   Tree that is a identical to <tt>tree</tt>, but only has those items
--   that are present in both <tt>tree</tt> and <tt>guide</tt>. The
--   <tt>guide</tt> Tree may not contain any stubs.
restrict :: Tree -> Tree -> Tree
modifyTree :: Tree -> AnchoredPath -> Maybe TreeItem -> Tree
updateTreePostorder :: (Tree -> Tree) -> Tree -> Tree
instance Show ItemType
instance Eq ItemType
instance Show Tree
instance Show TreeItem
instance Show Blob


-- | This module contains plain tree indexing code. The index itself is a
--   CACHE: you should only ever use it as an optimisation and never as a
--   primary storage. In practice, this means that when we change index
--   format, the application is expected to throw the old index away and
--   build a fresh index. Please note that tracking index validity is out
--   of scope for this library: this is responsibility of your application.
--   It is advisable that in your validity tracking code, you also check
--   for format validity (see <a>indexFormatValid</a>) and scrap and
--   re-create index when needed.
--   
--   The index is a binary file that overlays a hashed tree over the
--   working copy. This means that every working file and directory has an
--   entry in the index, that contains its path and hash and validity data.
--   The validity data is a <a>last seen</a> timestamp plus the file size.
--   The file hashes are sha256's of the file's content.
--   
--   There are two entry types, a file entry and a directory entry. Both
--   have a common binary format (see <a>Item</a>). The on-disk format is
--   best described by <a>peekItem</a>.
--   
--   For each file, the index has a copy of the timestamp taken at the
--   instant when the hash has been computed. This means that when file
--   size and timestamp of a file in working copy matches those in the
--   index, we assume that the hash stored in the index for given file is
--   valid. These hashes are then exposed in the resulting <a>Tree</a>
--   object, and can be leveraged by eg. <a>diffTrees</a> to compare many
--   files quickly.
--   
--   You may have noticed that we also keep hashes of directories. These
--   are assumed to be valid whenever the complete subtree has had valid
--   timestamps. At any point, as soon as a size or timestamp mismatch is
--   found, the working file in question is opened, its hash (and timestamp
--   and size) is recomputed and updated in-place in the index file
--   (everything lives at a fixed offset and is fixed size, so this isn't
--   an issue). This is also true of directories: when a file in a
--   directory changes hash, this triggers recomputation of all of its
--   parent directory hashes; moreover this is done efficiently -- each
--   directory is updated at most once during a run.
module Storage.Hashed.Index

-- | Read an index and build up a <a>Tree</a> object from it, referring to
--   current working directory. Any parts of the index that are out of date
--   are updated in-place. The result is always an up-to-date index. Also,
--   the <a>Tree</a> is stubby and only the pieces of the index that are
--   expanded will be actually updated! To implement a subtree query, you
--   can use Tree.filter and then expand the result. Otherwise just expand
--   the whole tree to avoid unexpected problems.
readIndex :: FilePath -> (Tree -> Hash) -> IO Tree

-- | Will add and remove files in index to make it match the <a>Tree</a>
--   object given (it is an error for the <a>Tree</a> to contain a file or
--   directory that does not exist in a plain form in current working
--   directory).
updateIndexFrom :: FilePath -> (Tree -> Hash) -> Tree -> IO Tree

-- | DEPRECATED! Read index (just like readIndex). However, also check that
--   the index version matches our expectations and if not, rebuild it from
--   the reference (which is provided in form of un-executed action; we
--   will only execute it when needed).
readOrUpgradeIndex :: FilePath -> (Tree -> Hash) -> IO Tree -> IO Tree

-- | Check that a given file is an index file with a format we can handle.
--   You should remove and re-create the index whenever this is not true.
indexFormatValid :: FilePath -> IO Bool
instance Show Item


-- | A few darcs-specific utility functions. These are used for reading and
--   writing darcs and darcs-compatible hashed trees.
module Storage.Hashed.Darcs
darcsFormatSize :: (Num a) => a -> ByteString
darcsFormatHash :: Hash -> ByteString

-- | decode_white interprets the Darcs-specific "encoded" filenames
--   produced by <a>darcsEncodeWhite</a>
--   
--   <pre>
--   darcsDecodeWhite "hello\32\there" == "hello there"
--   darcsDecodeWhite "hello\92\there" == "hello\there"
--   darcsDecodeWhite "hello\there"    == error "malformed filename"
--   </pre>
darcsDecodeWhite :: String -> FilePath

-- | encode_white translates whitespace in filenames to a darcs-specific
--   format (backslash followed by numerical representation according to
--   <a>ord</a>). Note that backslashes are also escaped since they are
--   used in the encoding.
--   
--   <pre>
--   darcsEncodeWhite "hello there" == "hello\32\there"
--   darcsEncodeWhite "hello\there" == "hello\92\there"
--   </pre>
darcsEncodeWhite :: FilePath -> String
darcsFormatDir :: Tree -> ByteString
darcsParseDir :: ByteString -> [(ItemType, Name, Hash)]

-- | Compute a darcs-compatible hash value for a tree-like structure.
darcsTreeHash :: Tree -> Hash


-- | An experimental monadic interface to Tree mutation. The main idea is
--   to simulate IO-ish manipulation of real filesystem (that's the state
--   part of the monad), and to keep memory usage down by reasonably often
--   dumping the intermediate data to disk and forgetting it. XXX This
--   currently does not work as advertised and the monads leak memory. So
--   far, I'm at a loss why this happens.
module Storage.Hashed.Monad

-- | Run a <a>TreeIO</a> <tt>action</tt> in a hashed setting. The
--   <tt>initial</tt> tree is assumed to be fully available from the
--   <tt>directory</tt>, and any changes will be written out to same.
--   Please note that actual filesystem files are never removed.
--   
--   XXX This somehow manages to leak memory, somewhere.
hashedTreeIO :: TreeIO a -> Tree -> FilePath -> IO (a, Tree)

-- | Run a <a>TreeIO</a> action in a plain tree setting. Writes out changes
--   to the plain tree every now and then (after the action is finished,
--   the last tree state is always flushed to disk). XXX Modify the tree
--   with filesystem reading and put it back into st (ie. replace the
--   in-memory Blobs with normal ones, so the memory can be GCd).
plainTreeIO :: TreeIO a -> Tree -> FilePath -> IO (a, Tree)

-- | Run a TreeIO action without dumping anything to disk. Useful for
--   running tree mutations just for the purpose of getting the resulting
--   Tree and throwing it away.
virtualTreeIO :: TreeIO a -> Tree -> IO (a, Tree)

-- | Grab content of a file in the current Tree at the given path.
readFile :: AnchoredPath -> TreeIO ByteString

-- | Change content of a file at a given path. The change will be
--   eventually flushed to disk, but might be buffered for some time.
writeFile :: AnchoredPath -> ByteString -> TreeIO ()
createDirectory :: AnchoredPath -> TreeIO ()
rename :: AnchoredPath -> AnchoredPath -> TreeIO ()
unlink :: AnchoredPath -> TreeIO ()

-- | Check for existence of a file.
fileExists :: AnchoredPath -> TreeIO Bool

-- | Check for existence of a directory.
directoryExists :: AnchoredPath -> TreeIO Bool

-- | Check for existence of a node (file or directory, doesn't matter).
exists :: AnchoredPath -> TreeIO Bool
tree :: TreeState -> Tree
cwd :: TreeState -> AnchoredPath

-- | Internal state of the <a>TreeIO</a> monad. Keeps track of the current
--   Tree content, unsync'd changes and a current working directory (of the
--   monad).
data TreeState

-- | A <a>TreeIO</a> monad. A sort of like IO but it keeps a
--   <a>TreeState</a> around as well, which is a sort of virtual
--   filesystem. Depending on how you obtained your <a>TreeIO</a>, the
--   actions in your virtual filesystem get somehow reflected in the actual
--   real filesystem. For <a>virtualTreeIO</a>, nothing happens in real
--   filesystem, however with <a>plainTreeIO</a>, the plain tree will be
--   updated every now and then, and with <a>hashedTreeIO</a> a darcs-style
--   hashed tree will get updated.
type TreeIO = StateT TreeState IO

module Storage.Hashed.Packed

-- | On-disk format for object storage: we implement a completely loose
--   format (one file per object), a compact format stored in a single
--   append-only file and an immutable <a>pack</a> format.
data Format
Loose :: Format
Compact :: Format
Pack :: Format
loosePath :: OS -> Hash -> FilePath
looseLookup :: OS -> Hash -> IO (Maybe FileSegment)

-- | Object storage block. When used as a hatchery, the loose or compact
--   format are preferable, while for mature space, the pack format is more
--   useful.
data Block
Block :: (Hash -> IO (Maybe FileSegment)) -> Int -> Format -> Block
blockLookup :: Block -> Hash -> IO (Maybe FileSegment)
size :: Block -> Int
format :: Block -> Format

-- | Object storage. Contains a single <a>hatchery</a> and possibly a
--   number of mature space blocks, usually in form of packs. It also keeps
--   a list of root pointers and has a way to extract pointers from objects
--   (externally supplied). These last two things are used to implement a
--   simple GC.
data OS
OS :: Block -> [Block] -> [Hash] -> (FileSegment -> IO [Hash]) -> FilePath -> OS
hatchery :: OS -> Block
mature :: OS -> [Block]
roots :: OS -> [Hash]
references :: OS -> FileSegment -> IO [Hash]
rootdir :: OS -> FilePath

-- | Reduce number of packs in the object storage. This may both recombine
--   packs to eliminate dead objects and join some packs to form bigger
--   packs. The set of hashes given is used as roots for GC marking.
repack :: OS -> Set Hash -> IO OS

-- | Add new objects to the object storage (i.e. put them into hatchery).
--   It is safe to call this even on objects that are already present in
--   the storage: such objects will be skipped.
hatch :: OS -> [ByteString] -> IO OS

-- | Reduce hatchery size by moving things into packs.
compact :: OS -> IO OS
blocksLookup :: [Block] -> Hash -> IO (Maybe (Hash, FileSegment))
lookup :: OS -> Hash -> IO (Maybe FileSegment)

-- | Create an empty object storage in given directory, with a hatchery of
--   given format. The directory is created if needed, but is assumed to be
--   empty.
create :: FilePath -> Format -> IO OS
readOS :: FilePath -> IO OS
createPack :: OS -> [(Hash, FileSegment)] -> IO Block

-- | Build a map of live objects (i.e. those reachable from the given
--   roots) in a given list of Blocks.
live :: OS -> [Block] -> IO (Map Hash FileSegment)

-- | Read a Tree in the darcs hashed format from an object storage. This is
--   basically the same as readDarcsHashed from Storage.Hashed, but uses an
--   object storage instead of traditional darcs filesystem layout.
--   Requires the tree root hash as a starting point.
readPackedDarcsPristine :: OS -> Hash -> IO Tree

-- | Write a Tree into an object storage, using the darcs-style directory
--   formatting (and therefore darcs-style hashes). Gives back the object
--   storage and the root hash of the stored Tree. NB. The function expects
--   that the Tree comes equipped with darcs-style hashes already!
writePackedDarcsPristine :: Tree -> OS -> IO (OS, Hash)
storePackedDarcsPristine :: Tree -> OS -> IO (OS, Hash)
darcsPristineRefs :: FileSegment -> IO [Hash]
instance Show Format
instance Eq Format

module Storage.Hashed

-- | Read in a plain directory hierarchy from a filesystem. NB. The
--   <a>read</a> function on Blobs with such a Tree is susceptible to file
--   content changes. Since we use mmap in <a>read</a>, this will break
--   referential transparency and produce unexpected results. Please always
--   make sure that all parallel access to the underlying filesystem tree
--   never mutates files. Unlink + recreate is fine though (in other words,
--   the sync/write operations below are safe).
readPlainTree :: FilePath -> IO Tree

-- | Read in a darcs-style hashed tree. This is mainly useful for reading
--   "pristine.hashed". You need to provide the root hash you are
--   interested in (found in _darcs/hashed_inventory).
readDarcsHashed :: FilePath -> Hash -> IO Tree

-- | Read in a darcs pristine tree. Handles the plain and hashed pristine
--   cases. Does not (and will not) handle the no-pristine case, since that
--   requires replaying patches. Cf. <a>readDarcsHashed</a> and
--   <a>readPlainTree</a> that are used to do the actual <a>Tree</a>
--   construction.
readDarcsPristine :: FilePath -> IO Tree

-- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use
--   with care.
read :: Blob -> IO ByteString

-- | Read in a FileSegment into a Lazy ByteString. Implemented using mmap.
readSegment :: FileSegment -> IO ByteString

-- | Write out *full* tree to a plain directory structure. If you instead
--   want to make incremental updates, refer to <a>Monad.plainTreeIO</a>.
writePlainTree :: Tree -> FilePath -> IO ()

-- | Take a relative FilePath and turn it into an AnchoredPath. The
--   operation is unsafe and if you break it, you keep both pieces. More
--   useful for exploratory purposes (ghci) than for serious programming.
floatPath :: FilePath -> AnchoredPath

-- | Take a relative FilePath within a Tree and print the contents of the
--   object there. Useful for exploration, less so for serious programming.
printPath :: Tree -> FilePath -> IO ()