{-# LANGUAGE ScopedTypeVariables, BangPatterns #-}

-- | An experimental monadic interface to Tree mutation. The main idea is to
-- simulate IO-ish manipulation of real filesystem (that's the state part of
-- the monad), and to keep memory usage down by reasonably often dumping the
-- intermediate data to disk and forgetting it. XXX This currently does not
-- work as advertised and the monads leak memory. So far, I'm at a loss why
-- this happens.
module Storage.Hashed.Monad
    ( hashedTreeIO, plainTreeIO, virtualTreeIO
    , readFile, writeFile, createDirectory, rename, unlink
    , fileExists, directoryExists, exists
    , tree, cwd, TreeState, TreeIO
    ) where

import Prelude hiding ( read, catch, readFile, writeFile )

import Storage.Hashed.AnchoredPath
import Storage.Hashed.Tree
import Storage.Hashed.Utils
import Storage.Hashed.Darcs

import Control.Exception.Extensible( catch, SomeException(..) )

import System.Directory( createDirectoryIfMissing, doesFileExist )
import System.FilePath( (</>) )
import Data.List( inits )
import Data.Int( Int64 )
import Data.Maybe( isNothing, isJust )

import Codec.Compression.GZip( decompress, compress )

import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.ByteString.Char8 as BS
import Control.Monad.State.Strict
import qualified Data.Set as S

-- | Internal state of the 'TreeIO' monad. Keeps track of the current Tree
-- content, unsync'd changes and a current working directory (of the monad).
data TreeState = TreeState { cwd :: AnchoredPath
                           , tree :: Tree
                           , changed :: S.Set AnchoredPath
                           , changesize :: Int64
                           , sync :: TreeIO () }

-- | A 'TreeIO' monad. A sort of like IO but it keeps a 'TreeState' around as well,
-- which is a sort of virtual filesystem. Depending on how you obtained your
-- 'TreeIO', the actions in your virtual filesystem get somehow reflected in the
-- actual real filesystem. For 'virtualTreeIO', nothing happens in real
-- filesystem, however with 'plainTreeIO', the plain tree will be updated every
-- now and then, and with 'hashedTreeIO' a darcs-style hashed tree will get
-- updated.
type TreeIO = StateT TreeState IO

initialState :: Tree -> TreeIO () -> TreeState
initialState t s = TreeState { cwd = AnchoredPath []
                             , tree = t
                             , changed = S.empty
                             , changesize = 0
                             , sync = s }

runTreeIO :: TreeIO a -> TreeState -> IO (a, Tree)
runTreeIO action initial = do
  (out, final) <- runStateT (do x <- action
                                get >>= sync
                                return x) initial
  return (out, tree final)

-- | Run a TreeIO action without dumping anything to disk. Useful for running
-- tree mutations just for the purpose of getting the resulting Tree and
-- throwing it away.
virtualTreeIO :: TreeIO a -> Tree -> IO (a, Tree)
virtualTreeIO action t = runTreeIO action $ initialState t (return ())

-- | Create a hashed file from a 'FilePath' and content. In case the file exists
-- it is kept untouched and is assumed to have the right content. XXX Corrupt
-- files should be probably renamed out of the way automatically or something
-- (probably when they are being read though).
fsCreateHashedFile :: FilePath -> BL.ByteString -> TreeIO ()
fsCreateHashedFile fn content =
    liftIO $ do
      exist <- doesFileExist fn
      unless exist $ BL.writeFile fn content

replaceItemAbs :: AnchoredPath -> Maybe TreeItem -> TreeIO ()
replaceItemAbs path item =
    modify $ \st -> st { tree = modifyTree (tree st) path item }

replaceItem :: AnchoredPath -> Maybe TreeItem -> TreeIO ()
replaceItem path item =
    modify $ \st -> st { tree = modifyTree (tree st)
                                           (cwd st `catPaths` path) item }

expandTo :: AnchoredPath -> TreeIO ()
expandTo p = do t <- gets tree
                t' <- liftIO $ expandPath t p `catch` \(_::SomeException) -> return t
                modify $ \st -> st { tree = t' }

-- | Run a 'TreeIO' @action@ in a hashed setting. The @initial@ tree is assumed
-- to be fully available from the @directory@, and any changes will be written
-- out to same. Please note that actual filesystem files are never removed.
--
-- XXX This somehow manages to leak memory, somewhere.
hashedTreeIO :: TreeIO a -- ^ action
             -> Tree -- ^ initial
             -> FilePath -- ^ directory
             -> IO (a, Tree)
hashedTreeIO action t dir =
    do runTreeIO action $ initialState t syncHashed
    where syncHashed = do
            ch <- gets changed
            modify $ \st -> st { changed = S.empty }
            forM_ (reverse $ S.toList ch) $ \c -> do
                let path = anchorPath "" c
                current <- gets tree
                case find current c of
                  Just (File b) -> updateFile c b
                  Just (SubTree s) -> updateSub c s
                  _ -> fail $ "Bar at " ++ path
          updateFile path b@(Blob _ (Just !h)) = do
            let fn = dir </> BS.unpack (darcsFormatHash h)
                nblob = File $ Blob (decompress `fmap` BL.readFile fn) (Just h)
            newcontent <- liftIO $ compress `fmap` read b
            fsCreateHashedFile fn newcontent
            replaceItemAbs path (Just nblob)
          updateFile path b@(Blob _ Nothing) = do
            content <- liftIO $ read b
            let h = hashSetSize (sha256 content) (BL.length content)
                fn = dir </> BS.unpack (darcsFormatHash h)
                nblob = File $ Blob (decompress `fmap` BL.readFile fn) (Just h)
                newcontent = compress content
            fsCreateHashedFile fn newcontent
            replaceItemAbs path (Just nblob)
          updateSub path s = do
            let !hash = darcsTreeHash s
                dirdata = darcsFormatDir s
                fn = dir </> BS.unpack (darcsFormatHash $ hash)
                ns = SubTree (s { treeHash = Just hash })
            fsCreateHashedFile fn (compress dirdata)
            replaceItemAbs path (Just ns)

-- | Run a 'TreeIO' action in a plain tree setting. Writes out changes to the
-- plain tree every now and then (after the action is finished, the last tree
-- state is always flushed to disk). XXX Modify the tree with filesystem
-- reading and put it back into st (ie. replace the in-memory Blobs with normal
-- ones, so the memory can be GCd).
plainTreeIO :: TreeIO a -> Tree -> FilePath -> IO (a, Tree)
plainTreeIO action t dir = runTreeIO action $ initialState t syncPlain
    where syncPlain = do
            ch <- gets changed
            modify $ \st -> st { changed = S.empty }
            current  <- gets tree
            forM_ (S.toList ch) $ \c -> do
                let path = anchorPath dir c
                case find current c of
                  Just (File b) -> do
                    liftIO $ read b >>= BL.writeFile path
                    let nblob = File $ Blob (BL.readFile path) Nothing
                    modify $ \st -> st { tree = modifyTree (tree st) c
                                                           (Just nblob) }
                  Just (SubTree _) ->
                      liftIO $ createDirectoryIfMissing False path
                  _ -> fail $ "Foo at " ++ path

-- | Check for existence of a file.
fileExists :: AnchoredPath -> TreeIO Bool
fileExists p = do expandTo p
                  (isJust . (flip findFile p)) `fmap` gets tree

-- | Check for existence of a directory.
directoryExists :: AnchoredPath -> TreeIO Bool
directoryExists p = do expandTo p
                       (isJust . (flip findTree p)) `fmap` gets tree

-- | Check for existence of a node (file or directory, doesn't matter).
exists :: AnchoredPath -> TreeIO Bool
exists p = do expandTo p
              (isJust . (flip find p)) `fmap` gets tree

-- | Grab content of a file in the current Tree at the given path.
readFile :: AnchoredPath -> TreeIO BL.ByteString
readFile p = do expandTo p
                t <- gets tree
                let f = findFile t p
                case f of
                  Nothing -> fail $ "No such file " ++ show p
                  Just x -> liftIO (read x)

-- | Internal. Mark a given path as changed, so the next sync will flush the
-- modified object to disk.
markChanged :: AnchoredPath -> TreeIO ()
markChanged p = do
  x <- get
  size <- liftIO $ case findFile (tree x) p of
                     Just b -> BL.length `fmap` read b
                     Nothing -> return 0
  put $ x { changed = S.union paths (changed x)
          , changesize = changesize x + size }
    where paths = let (AnchoredPath x) = p
                   in S.fromList $ map AnchoredPath $ inits x

-- | Change content of a file at a given path. The change will be eventually
-- flushed to disk, but might be buffered for some time.
writeFile :: AnchoredPath -> BL.ByteString -> TreeIO ()
writeFile p con =
    do expandTo p
       replaceItem p (Just blob)
       markChanged p
       maybeSync
    where blob = File $ Blob (return con) hash
          hash = Just $ hashSetSize (sha256 con) (BL.length con)

createDirectory :: AnchoredPath -> TreeIO ()
createDirectory p = do expandTo p
                       replaceItem p $ Just $ SubTree emptyTree

unlink :: AnchoredPath -> TreeIO ()
unlink p = do expandTo p
              replaceItem p Nothing

rename :: AnchoredPath -> AnchoredPath -> TreeIO ()
rename from to = do expandTo from
                    tr <- gets tree
                    let item = find tr from
                    unless (isNothing item) $ do
                      replaceItem to item
                      replaceItem from Nothing

-- | If buffers are becoming large, sync, otherwise do nothing.
maybeSync :: TreeIO ()
maybeSync = do x <- gets changesize
               when (x > 16 * 1024 * 1024) $ get >>= sync