--  Copyright (C) 2009-2011 Petr Rockai
--
--  BSD3
{-# LANGUAGE ScopedTypeVariables, BangPatterns #-}

-- | A few darcs-specific utility functions. These are used for reading and
-- writing darcs and darcs-compatible hashed trees.
module Darcs.Util.Tree.Hashed
    ( -- * Obtaining Trees.
    --
    -- | Please note that Trees obtained this way will contain Stub
    -- items. These need to be executed (they are IO actions) in order to be
    -- accessed. Use 'expand' to do this. However, many operations are
    -- perfectly fine to be used on a stubbed Tree (and it is often more
    -- efficient to do everything that can be done before expanding a Tree).
      readDarcsHashed
    -- * Writing trees.
    , writeDarcsHashed
    -- * Interact with hashed tree
    , hashedTreeIO
    -- * Other
    , readDarcsHashedDir
    , readDarcsHashedNosize
    , darcsAddMissingHashes
    , darcsLocation
    , darcsTreeHash
    , decodeDarcsHash
    , decodeDarcsSize
    , darcsUpdateHashes
    ) where

import Prelude hiding ( lookup, (<$>) )
import System.FilePath ( (</>) )

import System.Directory( doesFileExist )
import Codec.Compression.GZip( decompress, compress )
import Control.Applicative( (<$>) )

import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy.Char8 as BLC
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString as B

import Data.List( sortBy )
import Data.Maybe( fromJust, isJust )
import Control.Monad.State.Strict

import Darcs.Util.Path
import Darcs.Util.ByteString ( FileSegment, readSegment )
import Darcs.Util.Hash
import Darcs.Util.Progress ( debugMessage )
import Darcs.Util.Tree
import Darcs.Util.Tree.Monad

---------------------------------------------------------------------
-- Utilities for coping with the darcs directory format.
--

decodeDarcsHash :: BC.ByteString -> Hash
decodeDarcsHash bs = case BC.split '-' bs of
                       [s, h] | BC.length s == 10 -> decodeBase16 h
                       _ -> decodeBase16 bs

decodeDarcsSize :: BC.ByteString -> Maybe Int
decodeDarcsSize bs = case BC.split '-' bs of
                       [s, _] | BC.length s == 10 ->
                                  case reads (BC.unpack s) of
                                    [(x, _)] -> Just x
                                    _ -> Nothing
                       _ -> Nothing

darcsLocation :: FilePath -> (Maybe Int, Hash) -> FileSegment
darcsLocation dir (s,h) = case hash of
                            "" -> error "darcsLocation: invalid hash"
                            _ -> (dir </> prefix s ++ hash, Nothing)
    where prefix Nothing = ""
          prefix (Just s') = formatSize s' ++ "-"
          formatSize s' = let n = show s' in replicate (10 - length n) '0' ++ n
          hash = BC.unpack (encodeBase16 h)

----------------------------------------------
-- Darcs directory format.
--

darcsFormatDir :: Tree m -> Maybe BLC.ByteString
darcsFormatDir t = BLC.fromChunks . concat <$>
                       mapM string (sortBy cmp $ listImmediate t)
    where cmp (a, _) (b, _) = compare a b
          string (name, item) =
              do header <- case item of
                             File _ -> Just $ BC.pack "file:\n"
                             _ -> Just $ BC.pack "directory:\n"
                 hash <- case itemHash item of
                           NoHash -> Nothing
                           x -> Just $ encodeBase16 x
                 return   [ header
                          , encodeWhiteName name
                          , BC.singleton '\n'
                          , hash, BC.singleton '\n' ]

darcsParseDir :: BLC.ByteString -> [(ItemType, Name, Maybe Int, Hash)]
darcsParseDir content = parse (BLC.split '\n' content)
    where
      parse (t:n:h':r) = (header t,
                          decodeWhiteName $ B.concat $ BL.toChunks n,
                          decodeDarcsSize hash,
                          decodeDarcsHash hash) : parse r
          where hash = BC.concat $ BLC.toChunks h'
      parse _ = []
      header x
          | x == BLC.pack "file:" = BlobType
          | x == BLC.pack "directory:" = TreeType
          | otherwise = error $ "Error parsing darcs hashed dir: " ++ BLC.unpack x

----------------------------------------
-- Utilities.
--

-- | Compute a darcs-compatible hash value for a tree-like structure.
darcsTreeHash :: Tree m -> Hash
darcsTreeHash t = case darcsFormatDir t of
                    Nothing -> NoHash
                    Just x -> sha256 x

-- The following two are mostly for experimental use in Packed.

darcsUpdateDirHashes :: Tree m -> Tree m
darcsUpdateDirHashes = updateSubtrees update
    where update t = t { treeHash = darcsTreeHash t }

darcsUpdateHashes :: (Monad m) => Tree m -> m (Tree m)
darcsUpdateHashes = updateTree update
    where update (SubTree t) = return . SubTree $ t { treeHash = darcsTreeHash t }
          update (File blob@(Blob con _)) =
              do hash <- sha256 <$> readBlob blob
                 return $ File (Blob con hash)
          update stub = return stub

darcsHash :: (Monad m) => TreeItem m -> m Hash
darcsHash (SubTree t) = return $ darcsTreeHash t
darcsHash (File blob) = sha256 <$> readBlob blob
darcsHash _ = return NoHash

darcsAddMissingHashes :: (Monad m) => Tree m -> m (Tree m)
darcsAddMissingHashes = addMissingHashes darcsHash

-------------------------------------------
-- Reading darcs pristine data
--

-- | Read and parse a darcs-style hashed directory listing from a given @dir@
-- and with a given @hash@.
readDarcsHashedDir :: FilePath -> (Maybe Int, Hash) -> IO [(ItemType, Name, Maybe Int, Hash)]
readDarcsHashedDir dir h = do
  debugMessage $ "readDarcsHashedDir: " ++ dir ++ " " ++ BC.unpack (encodeBase16 (snd h))
  exist <- doesFileExist $ fst (darcsLocation dir h)
  unless exist $ fail $ "error opening " ++ fst (darcsLocation dir h)
  compressed <- readSegment $ darcsLocation dir h
  let content = decompress compressed
  return $ if BLC.null compressed
              then []
              else darcsParseDir content

-- | Read in a darcs-style hashed tree. This is mainly useful for reading
-- \"pristine.hashed\". You need to provide the root hash you are interested in
-- (found in _darcs/hashed_inventory).
readDarcsHashed' :: Bool -> FilePath -> (Maybe Int, Hash) -> IO (Tree IO)
readDarcsHashed' _ _ (_, NoHash) = fail "Cannot readDarcsHashed NoHash"
readDarcsHashed' sizefail dir root@(_, hash) = do
  items' <- readDarcsHashedDir dir root
  subs <- sequence [
           do when (sizefail && isJust s) $
                fail ("Unexpectedly encountered size-prefixed hash in " ++ dir)
              case tp of
                BlobType -> return (d, File $
                                       Blob (readBlob' (s, h)) h)
                TreeType ->
                  do let t = readDarcsHashed dir (s, h)
                     return (d, Stub t h)
           | (tp, d, s, h) <- items' ]
  return $ makeTreeWithHash subs hash
    where readBlob' = fmap decompress . readSegment . darcsLocation dir

readDarcsHashed :: FilePath -> (Maybe Int, Hash) -> IO (Tree IO)
readDarcsHashed = readDarcsHashed' False

readDarcsHashedNosize :: FilePath -> Hash -> IO (Tree IO)
readDarcsHashedNosize dir hash = readDarcsHashed' True dir (Nothing, hash)

----------------------------------------------------
-- Writing darcs-style hashed trees.
--

-- | Write a Tree into a darcs-style hashed directory.
writeDarcsHashed :: Tree IO -> FilePath -> IO Hash
writeDarcsHashed tree' dir =
    do t <- darcsUpdateDirHashes <$> expand tree'
       sequence_ [ dump =<< readBlob b | (_, File b) <- list t ]
       let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ]
       _ <- mapM (dump . fromJust) dirs
       return $ darcsTreeHash t
    where dump bits =
              do let name = dir </> BC.unpack (encodeBase16 $ sha256 bits)
                 exist <- doesFileExist name
                 unless exist $ BL.writeFile name (compress bits)

-- | Create a hashed file from a 'FilePath' and content. In case the file exists
-- it is kept untouched and is assumed to have the right content. XXX Corrupt
-- files should be probably renamed out of the way automatically or something
-- (probably when they are being read though).
fsCreateHashedFile :: FilePath -> BLC.ByteString -> TreeIO ()
fsCreateHashedFile fn content =
    liftIO $ do
      exist <- doesFileExist fn
      unless exist $ BL.writeFile fn content

-- | Run a 'TreeIO' @action@ in a hashed setting. The @initial@ tree is assumed
-- to be fully available from the @directory@, and any changes will be written
-- out to same. Please note that actual filesystem files are never removed.
hashedTreeIO :: TreeIO a -- ^ action
             -> Tree IO -- ^ initial
             -> FilePath -- ^ directory
             -> IO (a, Tree IO)
hashedTreeIO action t dir =
    runTreeMonad action $ initialState t darcsHash updateItem
    where updateItem _ (File b) = File <$> updateFile b
          updateItem _ (SubTree s) = SubTree <$> updateSub s
          updateItem _ x = return x

          updateFile b@(Blob _ !h) = do
            content <- liftIO $ readBlob b
            let fn = dir </> BC.unpack (encodeBase16 h)
                nblob = Blob (decompress <$> rblob) h
                rblob = BL.fromChunks . return <$> B.readFile fn
                newcontent = compress content
            fsCreateHashedFile fn newcontent
            return nblob
          updateSub s = do
            let !hash = treeHash s
                Just dirdata = darcsFormatDir s
                fn = dir </> BC.unpack (encodeBase16 hash)
            fsCreateHashedFile fn (compress dirdata)
            return s