{-# LANGUAGE ParallelListComp #-}
module Storage.Hashed.Packed where

import Prelude hiding ( lookup, read )
import Storage.Hashed.AnchoredPath
import Storage.Hashed.Tree hiding ( lookup )
import Storage.Hashed.Utils
import Storage.Hashed.Darcs

import Control.Monad( forM, forM_, unless )
import Control.Applicative( (<$>) )
import System.FilePath( (</>), (<.>) )
import System.Directory( createDirectoryIfMissing, removeFile
                       , getDirectoryContents )

import Bundled.Posix( fileExists, isDirectory, getFileStatus )

import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Char8 as BS
import Data.Maybe( listToMaybe, catMaybes, fromJust, isNothing )

import qualified Data.Set as S
import qualified Data.Map as M
import Data.List( sort )

-- | On-disk format for object storage: we implement a completely loose format
-- (one file per object), a compact format stored in a single append-only file
-- and an immutable "pack" format.
data Format = Loose | Compact | Pack deriving (Show, Eq)

is_loose os = format (hatchery os) == Loose
is_compact os = format (hatchery os) == Compact

loose_dirs = let chars = ['0'..'9'] ++ ['a'..'f']
              in [ [a,b] | a <- chars, b <- chars ]

loosePath :: OS -> Hash -> FilePath
loosePath os (Hash (_,hash)) =
    let hash' = BS.unpack hash
     in rootdir os </> "hatchery" </> take 2 hash' </> drop 2 hash'

looseLookup :: OS -> Hash -> IO (Maybe FileSegment)
looseLookup os hash = do
  let path = loosePath os hash
  exist <- fileExists <$> getFileStatus path
  return $ if exist then Just (path, Nothing)
                    else Nothing

-- | Object storage block. When used as a hatchery, the loose or compact format
-- are preferable, while for mature space, the pack format is more useful.
data Block = Block { blockLookup :: Hash -> IO (Maybe FileSegment)
                   , size :: Int
                   , format :: Format }

-- | Object storage. Contains a single "hatchery" and possibly a number of
-- mature space blocks, usually in form of packs. It also keeps a list of root
-- pointers and has a way to extract pointers from objects (externally
-- supplied). These last two things are used to implement a simple GC.
data OS = OS { hatchery :: Block
             , mature :: [Block]
             , roots :: [Hash]
             , references :: FileSegment -> IO [Hash]
             , rootdir :: FilePath }

-- | Reduce number of packs in the object storage. This may both recombine
-- packs to eliminate dead objects and join some packs to form bigger
-- packs. The set of hashes given is used as roots for GC marking.
repack :: OS -> S.Set Hash -> IO OS
repack os roots = error "repack undefined"

-- | Add new objects to the object storage (i.e. put them into hatchery). It is
-- safe to call this even on objects that are already present in the storage:
-- such objects will be skipped.
hatch :: OS -> [BL.ByteString] -> IO OS
hatch os blobs =
    do processed <- mapM sieve blobs
       write [ (h, b) | (True, h, b) <- processed ]
    where write bits
              | is_loose os =
                  do forM bits $ \(hash, blob) -> do
                       BL.writeFile (loosePath os hash) blob
                     return os
              | is_compact os = error "hatch/compact undefined"
              | otherwise = fail "Hatchery must be either Loose or Compact."
          sieve blob = do let hash = sha256 blob
                          absent <- isNothing <$> lookup os hash
                          return (absent, hash, blob)

-- | Reduce hatchery size by moving things into packs.
compact :: OS -> IO OS
compact os = do objects <- live os [hatchery os]
                block <- createPack os (M.toList objects)
                cleanup
                return $ os { mature = block:mature os }
    where cleanup | is_loose os =
                      forM_ loose_dirs $ nuke . ((rootdir os </> "hatchery") </>)
                  | is_compact os =
                      removeFile (rootdir os </> "hatchery") >> return ()
          nuke dir = mapM (removeFile . (dir </>)) =<<
                       (Prelude.filter (`notElem` [".", ".."]) `fmap`
                               getDirectoryContents dir)

blocksLookup :: [Block] -> Hash -> IO (Maybe (Hash, FileSegment))
blocksLookup blocks hash =
    do segment <- cat `fmap` mapM (flip blockLookup hash) blocks
       return $ case segment of
                  Nothing -> Nothing
                  Just seg -> Just (hash, seg)
    where cat = listToMaybe . catMaybes

lookup :: OS -> Hash -> IO (Maybe FileSegment)
lookup os hash =
    do res <- blocksLookup (hatchery os : mature os) hash
       return $ case res of
                  Nothing -> Nothing
                  Just (_, seg) -> Just seg

-- | Create an empty object storage in given directory, with a hatchery of
-- given format. The directory is created if needed, but is assumed to be
-- empty.
create :: FilePath -> Format -> IO OS
create path fmt = do createDirectoryIfMissing True path
                     initHatchery
                     readOS path
    where initHatchery | fmt == Loose =
                           do mkdir hatchpath
                              forM loose_dirs $ mkdir . (hatchpath </>)
                       | fmt == Compact =
                           error "create/mkHatchery Compact undefined"
          mkdir = createDirectoryIfMissing False
          hatchpath = path </> "hatchery"

readOS :: FilePath -> IO OS
readOS path =
    do hatch_stat <- getFileStatus $ path </> "hatchery"
       let is_os = fileExists hatch_stat
           is_loose = isDirectory hatch_stat
       unless is_os $ fail $ path ++ " is not an object storage!"
       let _hatchery = Block { blockLookup = look os
                             , format = if is_loose then Loose else Compact }
           os = OS { hatchery = _hatchery
                   , rootdir = path
                   , mature = packs
                   , roots = _roots }
           look | format _hatchery == Loose = looseLookup
                | otherwise = undefined
           packs = [] -- FIXME read packs
           _roots = [] -- FIXME read packs
       return os

readPack = undefined

createPack :: OS -> [(Hash, FileSegment)] -> IO Block
createPack os bits =
    do contents <- mapM readSegment (map snd bits)
       let offsets = scanl (+) 0 $ map BL.length contents
           headerbits = [ BL.fromChunks [ hash
                                        , BS.pack ": ("
                                        , BS.pack (show $ offset)
                                        , BS.pack ", "
                                        , BS.pack (show $ BL.length string)
                                        , BS.pack ")\n" ]
                          | (Hash (_, hash), _) <- bits | string <- contents | offset <- offsets ]
           header = BL.concat $ sort headerbits
           blob = BL.concat $ header:contents
           hash@(Hash (_, hashhash)) = sha256 blob
           path = rootdir os </> BS.unpack hashhash <.> "bin"
       BL.writeFile path blob
       return $ readPack path

-- | Build a map of live objects (i.e. those reachable from the given roots) in
-- a given list of Blocks.
live :: OS -> [Block] -> IO (M.Map Hash FileSegment)
live os blocks =
    reachable (references os)
              (blocksLookup blocks)
              (S.fromList $ roots os)

--------------------------------------------------------------
-- Reading and writing darcs-style pristine
----

-- | Read a Tree in the darcs hashed format from an object storage. This is
-- basically the same as readDarcsHashed from Storage.Hashed, but uses an
-- object storage instead of traditional darcs filesystem layout. Requires the
-- tree root hash as a starting point.
readPackedDarcsPristine :: OS -> Hash -> IO Tree
readPackedDarcsPristine os root =
    do seg <- fromJust <$> lookup os root
       items <- darcsParseDir <$> readSegment seg
       subs <- sequence [
                case tp of
                  BlobType -> return (d, File $ file h)
                  TreeType -> let t = readPackedDarcsPristine os h
                               in return (d, Stub t $ Just h)
                | (tp, d, h) <- items ]
       return $ makeTreeWithHash subs root
    where file h = Blob ((fromJust <$> lookup os h) >>= readSegment) (Just h)

-- | Write a Tree into an object storage, using the darcs-style directory
-- formatting (and therefore darcs-style hashes). Gives back the object storage
-- and the root hash of the stored Tree. NB. The function expects that the Tree
-- comes equipped with darcs-style hashes already!
writePackedDarcsPristine :: Tree -> OS -> IO (OS, Hash)
writePackedDarcsPristine tree os =
    do t <- expand tree
       files <- sequence [ read b | (_, File b) <- list t ]
       let dirs = darcsFormatDir t : [ darcsFormatDir d | (_, SubTree d) <- list t ]
       os' <- hatch os $ files ++ dirs
       return (os', darcsTreeHash t)

storePackedDarcsPristine :: Tree -> OS -> IO (OS, Hash)
storePackedDarcsPristine tree os =
    do (os', root) <- writePackedDarcsPristine tree os
       return $ (os' { roots = root : roots os'
                     -- FIXME we probably don't want to override the references
                     -- thing completely here...
                     , references = darcsPristineRefs }, root)

darcsPristineRefs :: FileSegment -> IO [Hash]
darcsPristineRefs fs = do
  con <- (darcsParseDir <$> readSegment fs) `catch` \_ -> return []
  return $! [ hash | (_, _, hash) <- con, valid hash ]
    where valid (Hash (_, hash)) =
              BS.length hash == 64 && all (`elem` "0123456789abcdef") (BS.unpack hash)