{-# LANGUAGE BangPatterns #-}

-- | A few darcs-specific utility functions. These are used for reading and
-- writing darcs and darcs-compatible hashed trees.
module Storage.Hashed.Darcs where

import Prelude hiding ( lookup )
import System.FilePath ( (</>) )

import System.Directory( createDirectoryIfMissing, doesFileExist, doesDirectoryExist )
import Codec.Compression.GZip( decompress, compress )
import Control.Applicative( (<$>) )

import qualified Data.ByteString.Char8 as BS
import qualified Data.ByteString.Lazy.Char8 as BL

import Data.List( sortBy )
import Data.Char( chr, ord, isSpace )
import Data.Maybe( fromJust, isNothing )
import qualified Data.Set as S
import Control.Monad.State.Strict

import Storage.Hashed.Tree hiding ( lookup )
import qualified Storage.Hashed.Tree as Tree
import Storage.Hashed.AnchoredPath
import Storage.Hashed.Utils
import Storage.Hashed.Hash
import Storage.Hashed.Packed
import Storage.Hashed.Monad
import Storage.Hashed.Plain

---------------------------------------------------------------------
-- Utilities for coping with the darcs directory format.
--

-- | 'darcsDecodeWhite' interprets the Darcs-specific \"encoded\" filenames
--   produced by 'darcsEncodeWhite'
--
--   > darcsDecodeWhite "hello\32\there" == "hello there"
--   > darcsDecodeWhite "hello\92\there" == "hello\there"
--   > darcsDecodeWhite "hello\there"    == error "malformed filename"
darcsDecodeWhite :: String -> FilePath
darcsDecodeWhite ('\\':cs) =
    case break (=='\\') cs of
    (theord, '\\':rest) ->
        chr (read theord) : darcsDecodeWhite rest
    _ -> error "malformed filename"
darcsDecodeWhite (c:cs) = c: darcsDecodeWhite cs
darcsDecodeWhite "" = ""

-- | 'darcsEncodeWhite' translates whitespace in filenames to a darcs-specific
--   format (backslash followed by numerical representation according to 'ord').
--   Note that backslashes are also escaped since they are used in the encoding.
--
--   > darcsEncodeWhite "hello there" == "hello\32\there"
--   > darcsEncodeWhite "hello\there" == "hello\92\there"
darcsEncodeWhite :: FilePath -> String
darcsEncodeWhite (c:cs) | isSpace c || c == '\\' =
    '\\' : (show $ ord c) ++ "\\" ++ darcsEncodeWhite cs
darcsEncodeWhite (c:cs) = c : darcsEncodeWhite cs
darcsEncodeWhite [] = []

darcsEncodeWhiteBS = BS.pack . darcsEncodeWhite . BS.unpack

decodeDarcsHash bs = case BS.split '-' bs of
                       [s, h] | BS.length s == 10 -> decodeBase16 h
                       _ -> decodeBase16 bs

decodeDarcsSize :: BS.ByteString -> Maybe Int
decodeDarcsSize bs = case BS.split '-' bs of
                       [s, _] | BS.length s == 10 ->
                                  case reads (BS.unpack s) of
                                    [(x, _)] -> Just x
                                    _ -> Nothing
                       _ -> Nothing

darcsLocation :: FilePath -> (Maybe Int, Hash) -> FileSegment
darcsLocation dir (s,h) = (dir </> (prefix s ++ BS.unpack (encodeBase16 h)), Nothing)
    where prefix Nothing = ""
          prefix (Just s) = formatSize s ++ "-"
          formatSize s = let n = show s in replicate (10 - length n) '0' ++ n

----------------------------------------------
-- Darcs directory format.
--

darcsFormatDir :: Tree m -> Maybe BL.ByteString
darcsFormatDir t = BL.fromChunks <$> concat <$>
                       mapM string (sortBy cmp $ listImmediate t)
    where cmp (Name a, _) (Name b, _) = compare a b
          string (Name name, item) =
              do header <- case item of
                             File _ -> Just $ BS.pack "file:\n"
                             SubTree _ -> Just $ BS.pack "directory:\n"
                             Stub _ _ -> Nothing
                 hash <- case itemHash item of
                           NoHash -> Nothing
                           x -> Just $ encodeBase16 x
                 return $ [ header
                          , darcsEncodeWhiteBS name
                          , BS.singleton '\n'
                          , hash, BS.singleton '\n' ]

darcsParseDir :: BL.ByteString -> [(ItemType, Name, Maybe Int, Hash)]
darcsParseDir content = parse (BL.split '\n' content)
    where
      parse (t:n:h':r) = (header t,
                          Name $ BS.pack $ darcsDecodeWhite (BL.unpack n),
                          decodeDarcsSize hash,
                          decodeDarcsHash hash) : parse r
          where hash = BS.concat $ BL.toChunks h'
      parse _ = []
      header x
          | x == BL.pack "file:" = BlobType
          | x == BL.pack "directory:" = TreeType
          | otherwise = error $ "Error parsing darcs hashed dir: " ++ BL.unpack x

----------------------------------------
-- Utilities.
--

-- | Compute a darcs-compatible hash value for a tree-like structure.
darcsTreeHash :: Tree m -> Hash
darcsTreeHash t = case darcsFormatDir t of
                    Nothing -> NoHash
                    Just x -> sha256 x

-- The following two are mostly for experimental use in Packed.

darcsUpdateDirHashes tree = updateSubtrees update tree
    where update t = t { treeHash = darcsTreeHash t }

darcsUpdateHashes tree = updateTree update tree
    where update (SubTree t) = return . SubTree $ t { treeHash = darcsTreeHash t }
          update (File blob@(Blob con _)) =
              do hash <- sha256 <$> readBlob blob
                 return $ File (Blob con hash)

-------------------------------------------
-- Reading darcs pristine data
--

-- | Read and parse a darcs-style hashed directory listing from a given @dir@
-- and with a given @hash@.
readDarcsHashedDir :: FilePath -> (Maybe Int, Hash) -> IO [(ItemType, Name, Maybe Int, Hash)]
readDarcsHashedDir dir h = do
  exist <- doesFileExist $ fst (darcsLocation dir h)
  unless exist $ fail $ "error opening " ++ fst (darcsLocation dir h)
  compressed <- readSegment $ darcsLocation dir h
  let content = decompress compressed
  return $ if BL.null compressed
              then []
              else darcsParseDir content

-- | Read in a darcs-style hashed tree. This is mainly useful for reading
-- \"pristine.hashed\". You need to provide the root hash you are interested in
-- (found in _darcs/hashed_inventory).
readDarcsHashed :: FilePath -> (Maybe Int, Hash) -> IO (Tree IO)
readDarcsHashed dir (_, NoHash) = fail "Cannot readDarcsHashed NoHash"
readDarcsHashed dir root@(_, hash) = do
  items <- readDarcsHashedDir dir root
  subs <- sequence [
           case tp of
             BlobType -> return (d, File $
                                      Blob (readBlob (s, h)) h)
             TreeType ->
                 do let t = readDarcsHashed dir (s, h)
                    return (d, Stub t h)
           | (tp, d, s, h) <- items ]
  return $ makeTreeWithHash subs hash
    where readBlob = fmap decompress . readSegment . darcsLocation dir

----------------------------------------------------
-- Writing darcs-style hashed trees.
--

-- | Write a Tree into a darcs-style hashed directory.
writeDarcsHashed :: Tree IO -> FilePath -> IO Hash
writeDarcsHashed tree dir =
    do t <- darcsUpdateDirHashes <$> expand tree
       sequence_ [ dump =<< readBlob b | (_, File b) <- list t ]
       let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ]
       os' <- mapM dump $ map fromJust dirs
       return $ darcsTreeHash t
    where dump bits = BL.writeFile (dir </> BS.unpack (encodeBase16 $ sha256 bits)) (compress bits)


-- | Create a hashed file from a 'FilePath' and content. In case the file exists
-- it is kept untouched and is assumed to have the right content. XXX Corrupt
-- files should be probably renamed out of the way automatically or something
-- (probably when they are being read though).
fsCreateHashedFile :: FilePath -> BL.ByteString -> TreeIO ()
fsCreateHashedFile fn content =
    liftIO $ do
      exist <- doesFileExist fn
      unless exist $ BL.writeFile fn content

-- | Run a 'TreeIO' @action@ in a hashed setting. The @initial@ tree is assumed
-- to be fully available from the @directory@, and any changes will be written
-- out to same. Please note that actual filesystem files are never removed.
--
-- XXX This somehow manages to leak memory, in some usege scenarios (apparently
-- not even all). The only reproducer known so far is \"gorsvet pull\".
hashedTreeIO :: TreeIO a -- ^ action
             -> Tree IO -- ^ initial
             -> FilePath -- ^ directory
             -> IO (a, Tree IO)
hashedTreeIO action t dir =
    do runTreeMonad action $ initialState t syncHashed
    where syncHashed ch = do
            modify $ \st -> st { tree = darcsUpdateDirHashes $ tree st }
            forM_ (reverse $ S.toList ch) $ \c -> do
                let path = anchorPath "" c
                current <- gets tree
                case find current c of
                  Just (File b) -> updateFile c b
                  Just (SubTree s) -> updateSub c s
                  _ -> return () -- the file could have disappeared in the meantime
          updateFile path b@(Blob _ !h) = do
            content <- liftIO $ readBlob b
            let h' = case h of
                       NoHash -> sha256 content
                       _ -> h
                fn = dir </> BS.unpack (encodeBase16 h)
                nblob = File $ Blob (decompress `fmap` BL.readFile fn) h
                newcontent = compress content
            fsCreateHashedFile fn newcontent
            replaceItem path (Just nblob)
          updateSub path s = do
            let !hash = darcsTreeHash s
                Just dirdata = darcsFormatDir s
                fn = dir </> BS.unpack (encodeBase16 hash)
                ns = SubTree (s { treeHash = hash })
            fsCreateHashedFile fn (compress dirdata)
            replaceItem path (Just ns)

--------------------------------------------------------------
-- Reading and writing packed pristine. EXPERIMENTAL.
----

-- | Read a Tree in the darcs hashed format from an object storage. This is
-- basically the same as readDarcsHashed from Storage.Hashed, but uses an
-- object storage instead of traditional darcs filesystem layout. Requires the
-- tree root hash as a starting point.
readPackedDarcsPristine :: OS -> Hash -> IO (Tree IO)
readPackedDarcsPristine os root =
    do items <- darcsParseDir <$> grab root
       subs <- sequence [
                case tp of
                  BlobType -> return (d, File $ file h)
                  TreeType -> let t = readPackedDarcsPristine os h
                               in return (d, Stub t h)
                | (tp, d, _, h) <- items ]
       return $ makeTreeWithHash subs root
    where file h = Blob (grab h) h
          grab hash = do maybeseg <- lookup os hash
                         case maybeseg of
                           Nothing -> fail $ "hash " ++ BS.unpack (encodeBase16 hash) ++ " not available"
                           Just seg -> readSegment seg

-- | Write a Tree into an object storage, using the darcs-style directory
-- formatting (and therefore darcs-style hashes). Gives back the object storage
-- and the root hash of the stored Tree. NB. The function expects that the Tree
-- comes equipped with darcs-style hashes already!
writePackedDarcsPristine :: Tree IO -> OS -> IO (OS, Hash)
writePackedDarcsPristine tree os =
    do t <- darcsUpdateDirHashes <$> expand tree
       files <- sequence [ readBlob b | (_, File b) <- list t ]
       let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ]
       os' <- hatch os $ files ++ (map fromJust dirs)
       return (os', darcsTreeHash t)

storePackedDarcsPristine :: Tree IO -> OS -> IO (OS, Hash)
storePackedDarcsPristine tree os =
    do (os', root) <- writePackedDarcsPristine tree os
       return $ (os' { roots = root : roots os'
                     -- FIXME we probably don't want to override the references
                     -- thing completely here...
                     , references = darcsPristineRefs }, root)

darcsPristineRefs :: FileSegment -> IO [Hash]
darcsPristineRefs fs = do
  con <- (darcsParseDir <$> readSegment fs) `catch` \_ -> return []
  return $! [ hash | (_, _, _, hash) <- con, valid hash ]
    where valid NoHash = False
          valid _ = True