-- | The abstract representation of a Tree and useful abstract utilities to
-- handle those.
module Storage.Hashed.Tree
    ( Tree, Blob(..), TreeItem(..), ItemType(..), Hash(..)
    , makeTree, makeTreeWithHash, emptyTree, emptyBlob

    -- * Unfolding stubbed (lazy) Trees.
    --
    -- | By default, Tree obtained by a read function is stubbed: it will
    -- contain Stub items that need to be executed in order to access the
    -- respective subtrees. 'expand' will produce an unstubbed Tree.
    , expand, expandPath

    -- * Tree access and lookup.
    , items, list, listImmediate, treeHash
    , lookup, find, findFile, findTree, itemHash, itemType
    , zipCommonFiles, zipFiles, zipTrees, diffTrees

    -- * Files (Blobs).
    , read

    -- * Manipulating trees.
    , finish, filter, restrict, modifyTree ) where

import Prelude hiding( lookup, filter, read, all )
import Storage.Hashed.AnchoredPath
import Storage.Hashed.Utils( Hash(..) )

import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Map as M

import Data.Maybe( catMaybes )
import Data.List( union, sort )

--------------------------------
-- Tree, Blob and friends
--

data Blob = Blob !(IO BL.ByteString) !(Maybe Hash)
data TreeItem = File !Blob
              | SubTree !Tree
              | Stub !(IO Tree) !(Maybe Hash)

data ItemType = BlobType | TreeType deriving (Show, Eq)

-- | Abstraction of a filesystem tree.
-- Please note that the Tree returned by the respective read operations will
-- have TreeStub items in it. To obtain a Tree without such stubs, call
-- expand on it, eg.:
--
-- > tree <- readDarcsPristine "." >>= expand
--
-- When a Tree is expanded, it becomes "final". All stubs are forced and the
-- Tree can be traversed purely. Access to actual file contents stays in IO
-- though.
--
-- A Tree may have a Hash associated with it. A pair of Tree's is identical
-- whenever their hashes are (the reverse need not hold, since not all Trees
-- come equipped with a hash).
data Tree = Tree { items :: (M.Map Name TreeItem)
                 , listImmediate :: [(Name, TreeItem)]
                 -- | Get hash of a Tree. This is guaranteed to uniquely
                 -- identify the Tree (including any blob content), as far as
                 -- cryptographic hashes are concerned. Sha256 is recommended.
                 , treeHash :: !(Maybe Hash)
                 -- | When implementing a Tree that has complex expanding
                 -- semantics, the "finish" IO action lets you do arbitrary IO
                 -- transform on the Tree after it is expanded but before it is
                 -- given to the user by expand. (Used to implement Index
                 -- updates, eg.)
                 , finish :: Tree -> IO Tree }

-- | Get a hash of a TreeItem. May be Nothing.
itemHash :: TreeItem -> Maybe Hash
itemHash (File (Blob _ h)) = h
itemHash (SubTree t) = treeHash t
itemHash (Stub _ h) = h

itemType :: TreeItem -> ItemType
itemType (File _) = BlobType
itemType (SubTree _) = TreeType
itemType (Stub _ _) = TreeType

emptyTree :: Tree
emptyTree = Tree { items = M.empty
                 , listImmediate = []
                 , treeHash = Nothing
                 , finish = return }

emptyBlob :: Blob
emptyBlob = Blob (return BL.empty) Nothing

makeTree :: [(Name,TreeItem)] -> Tree
makeTree l = Tree { items = M.fromList l
                  , listImmediate = l
                  , treeHash = Nothing
                  , finish = return }

makeTreeWithHash :: [(Name,TreeItem)] -> Hash -> Tree
makeTreeWithHash l h = Tree { items = M.fromList l
                            , listImmediate = l
                            , treeHash = Just h
                            , finish = return }

-----------------------------------
-- Tree access and lookup
--

-- | Look up a 'Tree' item (an immediate subtree or blob).
lookup :: Tree -> Name -> Maybe TreeItem
lookup t n = M.lookup n (items t)

find' :: TreeItem -> AnchoredPath -> Maybe TreeItem
find' t (AnchoredPath []) = Just t
find' (SubTree t) (AnchoredPath (d : rest)) =
    case lookup t d of
      Just sub -> find' sub (AnchoredPath rest)
      Nothing -> Nothing
find' _ _ = Nothing

-- | Find a 'TreeItem' by its path. Gives 'Nothing' if the path is invalid.
find :: Tree -> AnchoredPath -> Maybe TreeItem
find = find' . SubTree

-- | Find a 'Blob' by its path. Gives 'Nothing' if the path is invalid, or does
-- not point to a Blob.
findFile :: Tree -> AnchoredPath -> Maybe Blob
findFile t p = case find t p of
                 Just (File x) -> Just x
                 _ -> Nothing

-- | Find a 'Tree' by its path. Gives 'Nothing' if the path is invalid, or does
-- not point to a Tree.
findTree :: Tree -> AnchoredPath -> Maybe Tree
findTree t p = case find t p of
                 Just (SubTree x) -> Just x
                 _ -> Nothing

-- | List all contents of a 'Tree'.
list :: Tree -> [(AnchoredPath, TreeItem)]
list t_ = paths t_ (AnchoredPath [])
    where paths t p = [ (appendPath p n, i)
                          | (n,i) <- listImmediate t ] ++
                    concat [ paths subt (appendPath p subn)
                             | (subn, SubTree subt) <- listImmediate t ]

-- | Unfold a stubbed Tree into a one with no stubs in it. You might want to
-- filter the tree before expanding to save IO.
expand :: Tree -> IO Tree
expand t = do
  expanded <- mapM subtree [ x | x@(_, item) <- listImmediate t, isSub item ]
  let orig = [ i | i <- listImmediate t, not $ isSub $ snd i ]
      orig_map = M.filter (not . isSub) (items t)
      expanded_map = M.fromList expanded
      tree = t { items = M.union orig_map expanded_map
               , listImmediate = orig ++ expanded }
  finish tree tree
    where subtree (name, sub) = do tree <- expand =<< unstub sub
                                   return (name, SubTree tree)
          unstub (Stub s _) = s
          unstub (SubTree t) = return t
          isSub (File _) = False
          isSub _ = True

-- | Unfold a path in a (stubbed) Tree, such that the leaf node of the path is
-- reachable without crossing any stubs.
expandPath :: Tree -> AnchoredPath -> IO Tree
expandPath t_ path_ = do expand' t_ path_
    where expand' t (AnchoredPath [_]) = return t
          expand' t (AnchoredPath (n:rest)) = do
            case lookup t n of
              (Just (Stub stub _)) ->
                  do unstubbed <- stub
                     amend t n rest unstubbed
              (Just (SubTree t')) -> amend t n rest t'
              _ -> fail $ "Descent error in expandPath: " ++ show path_
          amend t name rest sub = do
            sub' <- expand' sub (AnchoredPath rest)
            let orig_l = [ i | i@(n',_) <- listImmediate t, name /= n' ]
                tree = t { items = M.insert name (SubTree sub') (items t)
                         , listImmediate = (name, SubTree sub') : orig_l }
            return tree

-- | Given two Trees, a @guide@ and a @tree@, produces a new Tree that is a
-- identical to @tree@, but only has those items that are present in both
-- @tree@ and @guide@. The @guide@ Tree may not contain any stubs.
restrict :: Tree -> Tree -> Tree
restrict guide tree = filter accept tree
    where accept path item =
              case (find guide path, item) of
                (Just (SubTree _), SubTree _) -> True
                (Just (SubTree _), Stub _ _) -> True
                (Just (File _), File _) -> True
                (Just (Stub _ _), _) ->
                    error "*sulk* Go away, you, you precondition violator!"
                (_, _) -> False

-- | Given a predicate of the form AnchoredPath -> TreeItem -> Bool, and a
-- Tree, produce a Tree that only has items for which the predicate returned
-- True. The tree might contain stubs. When expanded, these will be subject to
-- filtering as well.
filter :: (AnchoredPath -> TreeItem -> Bool) -> Tree -> Tree
filter predicate t_ = filter' t_ (AnchoredPath [])
    where filter' t path =
              let subs = (catMaybes [ (,) name `fmap` wibble path name item
                                      | (name,item) <- listImmediate t ])
              in t { items = M.mapMaybeWithKey (wibble path) $ items t
                   , listImmediate = subs
                   , treeHash = Nothing }
          wibble path name item =
              let npath = path `appendPath` name in
                  if predicate npath item
                     then Just $ filterSub npath item
                     else Nothing
          filterSub npath (SubTree t) = SubTree $ filter' t npath
          filterSub npath (Stub stub _) =
              Stub (do x <- stub
                       return $ filter' x npath) Nothing
          filterSub _ x = x

-- | Read a Blob into a Lazy ByteString. Might be backed by an mmap, use with
-- care.
read :: Blob -> IO BL.ByteString
read (Blob r _) = r

-- | For every pair of corresponding blobs from the two supplied trees,
-- evaluate the supplied function and accumulate the results in a list. Hint:
-- to get IO actions through, just use sequence on the resulting list.
-- NB. This won't expand any stubs.
zipCommonFiles :: (AnchoredPath -> Blob -> Blob -> a) -> Tree -> Tree -> [a]
zipCommonFiles f a b = catMaybes [ flip (f p) x `fmap` findFile a p
                                   | (p, File x) <- list b ]

-- | For each file in each of the two supplied trees, evaluate the supplied
-- function (supplying the corresponding file from the other tree, or Nothing)
-- and accumulate the results in a list. Hint: to get IO actions through, just
-- use sequence on the resulting list.  NB. This won't expand any stubs.
zipFiles :: (AnchoredPath -> Maybe Blob -> Maybe Blob -> a)
         -> Tree -> Tree -> [a]
zipFiles f a b = [ f p (findFile a p) (findFile b p)
                   | p <- paths a `union` paths b ]
    where paths t = sort [ p | (p, File _) <- list t ]

zipTrees :: (AnchoredPath -> Maybe TreeItem -> Maybe TreeItem -> a)
         -> Tree -> Tree -> [a]
zipTrees f a b = [ f p (find a p) (find b p)
                   | p <- reverse (paths a `union` paths b) ]
    where paths t = sort [ p | (p, _) <- list t ]

-- | Cautiously extracts differing subtrees from a pair of Trees. It will never
-- do any unneccessary expanding. Tree hashes are used to cut the comparison as
-- high up the Tree branches as possible. The result is a pair of trees that do
-- not share any identical subtrees. They are derived from the first and second
-- parameters respectively and they are always fully expanded. It might be
-- advantageous to feed the result into 'zipFiles'.
diffTrees :: Tree -> Tree -> IO (Tree, Tree)
diffTrees left right =
            if treeHash left `match` treeHash right
               then return (emptyTree, emptyTree)
               else diff left right
  where match (Just h) (Just j) | h == j = True
        match _ _ = False
        isFile (File _) = True
        isFile _ = False
        notFile = not . isFile
        subtree :: TreeItem -> IO Tree
        subtree (Stub x _) = x
        subtree (SubTree x) = return x
        subtree (File _) = error "diffTrees tried to descend a File as a subtree"
        maybeUnfold (Stub x _) = SubTree `fmap` (x >>= expand)
        maybeUnfold (SubTree x) = SubTree `fmap` expand x
        maybeUnfold i = return i
        immediateN t = [ n | (n, _) <- listImmediate t ]
        diff left' right' = do
          is <- sequence [
                   case (lookup left' n, lookup right' n) of
                     (Just l, Nothing) -> do
                       l' <- maybeUnfold l
                       return (n, Just l', Nothing)
                     (Nothing, Just r) -> do
                       r' <- maybeUnfold r
                       return (n, Nothing, Just r')
                     (Just l, Just r)
                         | itemHash l `match` itemHash r ->
                             return (n, Nothing, Nothing)
                         | notFile l && notFile r ->
                             do x <- subtree l
                                y <- subtree r
                                (x', y') <- diffTrees x y
                                return (n, Just $ SubTree x', Just $ SubTree y')
                         | isFile l && isFile r ->
                             return (n, Just l, Just r)
                         | otherwise -> do l' <- maybeUnfold l
                                           r' <- maybeUnfold r
                                           return (n, Just l', Just r')
                     _ -> error "n lookups failed"
                   | n <- immediateN left' `union` immediateN right' ]
          let is_l = [ (n, l) | (n, Just l ,_) <- is ]
              is_r = [ (n, r) | (n, _, Just r) <- is ]
          return (makeTree is_l, makeTree is_r)

modifyTree :: Tree -> AnchoredPath -> Maybe TreeItem -> Tree

modifyTree _ (AnchoredPath []) (Just (SubTree sub)) = sub

modifyTree t (AnchoredPath [n]) (Just item) =
    t { items = M.insert n item (items t)
      , listImmediate = (n,item) : subs
      , treeHash = Nothing }
  where subs = [ x | x@(n', _) <- listImmediate t, n /= n' ]

modifyTree t (AnchoredPath [n]) Nothing =
    t { items = M.delete n (items t)
      , listImmediate = subs
      , treeHash = Nothing }
  where subs = [ x | x@(n', _) <- listImmediate t, n /= n' ]

modifyTree t path@(AnchoredPath (n:r)) item =
    t { items = M.insert n sub (items t)
      , listImmediate = (n,sub) : subs
      , treeHash = Nothing }
  where subs = [ x | x@(n', _) <- listImmediate t, n /= n' ]
        modSubtree s = modifyTree s (AnchoredPath r) item
        sub = case lookup t n of
                Just (SubTree s) -> SubTree $ modSubtree s
                Just (Stub s _) -> Stub (do x <- s
                                            return $ modSubtree x) Nothing
                Nothing -> SubTree $ modSubtree emptyTree
                _ -> error $ "Modify tree at " ++ show path

modifyTree _ (AnchoredPath []) (Just (Stub _ _)) =
    error "Bug in descent in modifyTree."
modifyTree _ (AnchoredPath []) (Just (File _)) =
    error "Bug in descent in modifyTree."
modifyTree _ (AnchoredPath []) Nothing =
    error "Bug in descent in modifyTree."