{-# LANGUAGE ScopedTypeVariables, BangPatterns, TypeSynonymInstances, UndecidableInstances #-}

-- | An experimental monadic interface to Tree mutation. The main idea is to
-- simulate IO-ish manipulation of real filesystem (that's the state part of
-- the monad), and to keep memory usage down by reasonably often dumping the
-- intermediate data to disk and forgetting it. The monad interface itself is
-- generic, and a number of actual implementations can be used. This module
-- provides just 'virtualTreeIO' that never writes any changes, but may trigger
-- filesystem reads as appropriate.
module Storage.Hashed.Monad
    ( virtualTreeIO, virtualTreeMonad
    , readFile, writeFile, createDirectory, rename, unlink
    , fileExists, directoryExists, exists, withDirectory
    , currentDirectory
    , tree, TreeState, TreeMonad, TreeIO, runTreeMonad
    , PathSet, initialState, replaceItem
    ) where

import Prelude hiding ( readFile, writeFile )

import Storage.Hashed.AnchoredPath
import Storage.Hashed.Tree
import Storage.Hashed.Hash

import Control.Monad.Error( catchError, MonadError )

import Data.List( inits )
import Data.Int( Int64 )
import Data.Maybe( isNothing, isJust )

import qualified Data.ByteString.Lazy.Char8 as BL
import Control.Monad.RWS.Strict
import qualified Data.Set as S

type PathSet = S.Set AnchoredPath

-- | Internal state of the 'TreeIO' monad. Keeps track of the current Tree
-- content, unsync'd changes and a current working directory (of the monad).
data TreeState m = TreeState { tree :: !(Tree m)
                             , changed :: !PathSet
                             , changesize :: !Int64
                             , sync :: PathSet -> TreeMonad m () }

-- | A 'TreeIO' monad. A sort of like IO but it keeps a 'TreeState' around as well,
-- which is a sort of virtual filesystem. Depending on how you obtained your
-- 'TreeIO', the actions in your virtual filesystem get somehow reflected in the
-- actual real filesystem. For 'virtualTreeIO', nothing happens in real
-- filesystem, however with 'plainTreeIO', the plain tree will be updated every
-- now and then, and with 'hashedTreeIO' a darcs-style hashed tree will get
-- updated.
type TreeMonad m = RWST AnchoredPath () (TreeState m) m
type TreeIO = TreeMonad IO

class (Functor m, Monad m) => TreeRO m where
    currentDirectory :: m AnchoredPath
    withDirectory :: (MonadError e m) => AnchoredPath -> m a -> m a
    expandTo :: (MonadError e m) => AnchoredPath -> m AnchoredPath
    -- | Grab content of a file in the current Tree at the given path.
    readFile :: (MonadError e m) => AnchoredPath -> m BL.ByteString
    -- | Check for existence of a node (file or directory, doesn't matter).
    exists :: (MonadError e m) => AnchoredPath -> m Bool
    -- | Check for existence of a directory.
    directoryExists :: (MonadError e m) => AnchoredPath -> m Bool
    -- | Check for existence of a file.
    fileExists :: (MonadError e m) => AnchoredPath -> m Bool

class TreeRO m => TreeRW m where
    -- | Change content of a file at a given path. The change will be
    -- eventually flushed to disk, but might be buffered for some time.
    writeFile :: (MonadError e m) => AnchoredPath -> BL.ByteString -> m ()
    createDirectory :: (MonadError e m) => AnchoredPath -> m ()
    unlink :: (MonadError e m) => AnchoredPath -> m ()
    rename :: (MonadError e m) => AnchoredPath -> AnchoredPath -> m ()

initialState :: Tree m -> (PathSet -> TreeMonad m ()) -> TreeState m
initialState t s = TreeState { tree = t
                             , changed = S.empty
                             , changesize = 0
                             , sync = s }

flush :: (Monad m) => TreeMonad m ()
flush = do
  current <- get
  modify $ \st -> st { changed = S.empty, changesize = 0 }
  sync current (changed current)

runTreeMonad :: (Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m)
runTreeMonad action initial = do
  let action' = do x <- action
                   flush
                   return x
  (out, final, _) <- runRWST action' (AnchoredPath []) initial
  return (out, tree final)

-- | Run a TreeIO action without storing any changes. This is useful for
-- running monadic tree mutations for obtaining the resulting Tree (as opposed
-- to their effect of writing a modified tree to disk). The actions can do both
-- read and write -- reads are passed through to the actual filesystem, but the
-- writes are held in memory in a form of modified Tree.
virtualTreeMonad :: (Monad m) => TreeMonad m a -> Tree m -> m (a, Tree m)
virtualTreeMonad action t = runTreeMonad action $ initialState t (\_ -> return ())

virtualTreeIO :: TreeIO a -> Tree IO -> IO (a, Tree IO)
virtualTreeIO = virtualTreeMonad

-- | Modifies an item in the current Tree. This action keeps an account of the
-- modified data, in changed and changesize, for subsequent flush
-- operations. Any modifications (as in "modifyTree") are allowed.
modifyItem :: (MonadError e m, Functor m, Monad m)
            => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m ()
modifyItem path item = do
  path' <- (`catPaths` path) `fmap` currentDirectory
  let paths = let (AnchoredPath x) = path'
              in S.fromList $ map AnchoredPath $ inits x
  change <- changedSize path' item
  modify $ \st -> st { tree = modifyTree (tree st) path' item
                     , changed = (S.union paths (changed st))
                     , changesize = (changesize st + change) }

-- | Replace an item with a new version without modifying the content of the
-- tree. This does not do any change tracking. Ought to be only used from a
-- 'sync' implementation for a particular storage format. The presumed use-case
-- is that an existing in-memory Blob is replaced with a one referring to an
-- on-disk file.
replaceItem :: (MonadError e m, Functor m, Monad m)
            => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m ()
replaceItem path item = do
  path' <- (`catPaths` path) `fmap` currentDirectory
  modify $ \st -> st { tree = modifyTree (tree st) path' item }

changedSize :: (MonadError e m, Functor m, Monad m)
            => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m Int64
changedSize path item = do
    x <- get
    let ch = S.member path (changed x)
        size (Just (File b)) = lift (BL.length `fmap` readBlob b)
        size _ = return 0
    oldsize <- size $ find (tree x) path
    newsize <- size item
    return $! (if ch then newsize - oldsize else newsize)

-- | If buffers are becoming large, sync, otherwise do nothing.
maybeFlush :: (Monad m) => TreeMonad m ()
maybeFlush = do x <- gets changesize
                when (x > 100 * 1024 * 1024) $ flush

instance (Monad m, MonadError e m) => TreeRO (TreeMonad m) where
    expandTo p =
        do t <- gets tree
           p' <- (`catPaths` p) `fmap` ask
           let amend = do t' <- lift $ expandPath t p'
                          modify $ \st -> st { tree = t' }
           case find t p' of
             Nothing -> amend
             Just (Stub _ _) -> amend
             _ -> return ()
           return p'

    fileExists p =
        do p' <- expandTo p
           (isJust . (flip findFile p')) `fmap` gets tree

    directoryExists p =
        do p' <- expandTo p
           (isJust . (flip findTree p')) `fmap` gets tree

    exists p =
        do p' <- expandTo p
           (isJust . (flip find p')) `fmap` gets tree

    readFile p =
        do p' <- expandTo p
           t <- gets tree
           let f = findFile t p'
           case f of
             Nothing -> fail $ "No such file " ++ show p'
             Just x -> lift (readBlob x)

    currentDirectory = ask
    withDirectory dir act = do
      dir' <- expandTo dir
      local (\old -> dir') act

instance (Functor m, Monad m, MonadError e m) => TreeRW (TreeMonad m) where
    writeFile p con =
        do expandTo p
           modifyItem p (Just blob)
           maybeFlush
        where blob = File $ Blob (return con) hash
              hash = NoHash -- we would like to say "sha256 con" here, but due
                            -- to strictness of Hash in Blob, this would often
                            -- lead to unnecessary computation which would then
                            -- be discarded anyway; we rely on the sync
                            -- implementation to fix up any NoHash occurrences

    createDirectory p =
        do expandTo p
           modifyItem p $ Just $ SubTree emptyTree

    unlink p =
        do expandTo p
           modifyItem p Nothing

    rename from to =
        do from' <- expandTo from
           to' <- expandTo to
           tr <- gets tree
           let item = find tr from'
               found_to = find tr to'
           unless (isNothing found_to) $
                  fail $ "Error renaming: destination " ++ show to ++ " exists."
           unless (isNothing item) $ do
                  modifyItem from Nothing
                  modifyItem to item