{-# LANGUAGE ScopedTypeVariables, BangPatterns, TypeSynonymInstances, UndecidableInstances #-}

-- | An experimental monadic interface to Tree mutation. The main idea is to
-- simulate IO-ish manipulation of real filesystem (that's the state part of
-- the monad), and to keep memory usage down by reasonably often dumping the
-- intermediate data to disk and forgetting it. The monad interface itself is
-- generic, and a number of actual implementations can be used. This module
-- provides just 'virtualTreeIO' that never writes any changes, but may trigger
-- filesystem reads as appropriate.
--
-- XXX This currently does not work as advertised and the monads leak
-- memory. So far, I'm at a loss why this happens.
module Storage.Hashed.Monad
    ( virtualTreeIO, virtualTreeMonad
    , readFile, writeFile, createDirectory, rename, unlink
    , fileExists, directoryExists, exists, withDirectory
    , tree, TreeState, TreeMonad, TreeIO, runTreeMonad
    , PathSet, initialState, replaceItem
    ) where

import Prelude hiding ( readFile, writeFile )

import Storage.Hashed.AnchoredPath
import Storage.Hashed.Tree
import Storage.Hashed.Hash

import Control.Monad.Error( catchError, MonadError )

import Data.List( inits )
import Data.Int( Int64 )
import Data.Maybe( isNothing, isJust )

import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8( )
import Control.Monad.RWS.Strict
import qualified Data.Set as S

type PathSet = S.Set AnchoredPath

-- | Internal state of the 'TreeIO' monad. Keeps track of the current Tree
-- content, unsync'd changes and a current working directory (of the monad).
data TreeState m = TreeState { tree :: Tree m
                             , changed :: PathSet
                             , changesize :: Int64
                             , sync :: PathSet -> TreeMonad m () }

-- | A 'TreeIO' monad. A sort of like IO but it keeps a 'TreeState' around as well,
-- which is a sort of virtual filesystem. Depending on how you obtained your
-- 'TreeIO', the actions in your virtual filesystem get somehow reflected in the
-- actual real filesystem. For 'virtualTreeIO', nothing happens in real
-- filesystem, however with 'plainTreeIO', the plain tree will be updated every
-- now and then, and with 'hashedTreeIO' a darcs-style hashed tree will get
-- updated.
type TreeMonad m = RWST AnchoredPath () (TreeState m) m
type TreeIO = TreeMonad IO

class (Functor m, Monad m) => TreeRO m where
    currentDirectory :: m AnchoredPath
    withDirectory :: (MonadError e m) => AnchoredPath -> m a -> m a
    expandTo :: (MonadError e m) => AnchoredPath -> m ()
    -- | Grab content of a file in the current Tree at the given path.
    readFile :: (MonadError e m) => AnchoredPath -> m BL.ByteString
    -- | Check for existence of a node (file or directory, doesn't matter).
    exists :: (MonadError e m) => AnchoredPath -> m Bool
    -- | Check for existence of a directory.
    directoryExists :: (MonadError e m) => AnchoredPath -> m Bool
    -- | Check for existence of a file.
    fileExists :: (MonadError e m) => AnchoredPath -> m Bool

class TreeRO m => TreeRW m where
    -- | Change content of a file at a given path. The change will be
    -- eventually flushed to disk, but might be buffered for some time.
    writeFile :: (MonadError e m) => AnchoredPath -> BL.ByteString -> m ()
    createDirectory :: (MonadError e m) => AnchoredPath -> m ()
    unlink :: (MonadError e m) => AnchoredPath -> m ()
    rename :: (MonadError e m) => AnchoredPath -> AnchoredPath -> m ()

initialState :: Tree m -> (PathSet -> TreeMonad m ()) -> TreeState m
initialState t s = TreeState { tree = t
                             , changed = S.empty
                             , changesize = 0
                             , sync = s }

flush :: (Monad m) => TreeMonad m ()
flush = do
  current <- get
  modify $ \st -> st { changed = S.empty, changesize = 0 }
  sync current (changed current)

runTreeMonad :: (Monad m) => TreeMonad m a -> TreeState m -> m (a, Tree m)
runTreeMonad action initial = do
  let action' = do x <- action
                   flush
                   return x
  (out, final, _) <- runRWST action' (AnchoredPath []) initial
  return (out, tree final)

-- | Run a TreeIO action without storing any changes. This is useful for
-- running monadic tree mutations for obtaining the resulting Tree (as opposed
-- to their effect of writing a modified tree to disk). The actions can do both
-- read and write -- reads are passed through to the actual filesystem, but the
-- writes are held in memory in a form of modified Tree.
virtualTreeMonad :: (Monad m) => TreeMonad m a -> Tree m -> m (a, Tree m)
virtualTreeMonad action t = runTreeMonad action $ initialState t (\_ -> return ())

virtualTreeIO :: TreeIO a -> Tree IO -> IO (a, Tree IO)
virtualTreeIO = virtualTreeMonad

replaceItem :: (MonadError e m, Monad m) => AnchoredPath -> Maybe (TreeItem m) -> TreeMonad m ()
replaceItem path item = do
  path' <- (`catPaths` path) `fmap` currentDirectory
  modify $ \st -> st { tree = modifyTree (tree st) path' item }

-- | Internal. Mark a given path as changed, so the next sync will flush the
-- modified object to disk.
markChanged :: (Functor m, Monad m) => AnchoredPath -> TreeMonad m ()
markChanged p = do
  x <- get
  size <- lift $ case findFile (tree x) p of
                   Just b -> BL.length `fmap` readBlob b
                   Nothing -> return 0
  put $ x { changed = S.union paths (changed x)
          , changesize = changesize x + size }
    where paths = let (AnchoredPath x) = p
                   in S.fromList $ map AnchoredPath $ inits x

-- | If buffers are becoming large, sync, otherwise do nothing.
maybeFlush :: (Monad m) => TreeMonad m ()
maybeFlush = do x <- gets changesize
                when (x > 100 * 1024 * 1024) $ flush

instance (Monad m, MonadError e m) => TreeRO (TreeMonad m) where
    expandTo p =
        do t <- gets tree
           case find t p of
             Nothing -> do t' <- lift $ expandPath t p `catchError` \_ -> return t
                           modify $ \st -> st { tree = t' }
             _ -> return ()

    fileExists p =
        do expandTo p
           (isJust . (flip findFile p)) `fmap` gets tree

    directoryExists p =
        do expandTo p
           (isJust . (flip findTree p)) `fmap` gets tree

    exists p =
        do expandTo p
           (isJust . (flip find p)) `fmap` gets tree

    readFile p =
        do expandTo p
           t <- gets tree
           let f = findFile t p
           case f of
             Nothing -> fail $ "No such file " ++ show p
             Just x -> lift (readBlob x)

    currentDirectory = ask
    withDirectory dir = local (\old -> old `catPaths` dir)

instance (Functor m, Monad m, MonadError e m) => TreeRW (TreeMonad m) where
    writeFile p con =
        do expandTo p
           replaceItem p (Just blob)
           markChanged p
           maybeFlush
        where blob = File $ Blob (return con) hash
              hash = NoHash -- we would like to say "sha256 con" here, but due
                            -- to strictness of Hash in Blob, this would often
                            -- lead to unnecessary computation which would then
                            -- be discarded anyway; we rely on the sync
                            -- implementation to fix up any NoHash occurrences

    createDirectory p =
        do expandTo p
           replaceItem p $ Just $ SubTree emptyTree

    unlink p =
        do expandTo p
           replaceItem p Nothing

    rename from to =
        do expandTo from
           tr <- gets tree
           let item = find tr from
               found_to = find tr to
           unless (isNothing found_to) $
                  fail $ "Error renaming: destination " ++ show to ++ " exists."
           unless (isNothing item) $ do
                  replaceItem to item
                  replaceItem from Nothing