{-# LANGUAGE GeneralizedNewtypeDeriving, TemplateHaskell, DeriveDataTypeable #-}
module Database.KeyValueHash
  ( Key, Value
  , Size, mkSize, sizeLinear
  , HashFunction, stdHash, mkHashFunc
  , Database, createDatabase, openDatabase, closeDatabase
  , withCreateDatabase, withOpenDatabase
  , readKey, writeKey, deleteKey
  ) where

import Control.Applicative ((<$>), (<*>))
import Control.Monad (when)
import Data.Binary (Binary(..))
import Data.Binary.Get (runGet)
import Data.Binary.Put (runPut)
import Data.Derive.Binary(makeBinary)
import Data.DeriveTH(derive)
import Data.Hashable (Hashable, hash)
import Data.Monoid (mconcat)
import Data.Typeable (Typeable)
import Data.Word (Word8, Word32, Word64)
import System.FilePath ((</>))
import qualified Control.Exception as Exc
import qualified Data.ByteString as SBS
import qualified Data.ByteString.Lazy as LBS
import qualified System.Directory as Directory
import qualified System.IO as IO

-- of any length, but long values may be less efficiently handled
type Key = SBS.ByteString
type Value = SBS.ByteString

newtype Size = Size {
  sizeLog :: Word8
  }

sizeLinear :: Size -> Word64
sizeLinear = (2^) . sizeLog

instance Show Size where
  show (Size x) = "sz" ++ show x

mkSize :: Word64 -> Size
mkSize = Size . (ceiling :: Double -> Word8) . logBase 2 . fromIntegral

data HashFunction = HashFunction
  { hfHash :: Key -> Size -> Word64
  , hfName :: String
  }

-- Must not use 0-value because it is used as an invalid value ptr
cap :: Word64 -> Size -> Word64
cap val size = val `mod` (sizeLinear size)

mkHashFunc :: String -> (Key -> Word64) -> HashFunction
mkHashFunc name f = HashFunction
  { hfName = name
  , hfHash = cap . f
  }

stdHash :: HashFunction
stdHash = mkHashFunc "Hashable" (fromIntegral . hash)

data Database = Database
  { _dbPath :: FilePath -- directory container
  , dbSize :: Size
  , dbHashFunc :: HashFunction
  , dbKeysHandle :: IO.Handle
  , dbValuesHandle :: IO.Handle
  }

data FileRange = FileRange
  { _frOffset :: Word64
  , _frSize :: Word64
  }
derive makeBinary ''FileRange

type ValuePtr = Word64 -- offset in values file
type KeyPtr = Word64 -- index in key file

type KeyRecord = ValuePtr

data ValueHeader = ValueHeader
  { vhNextCollision :: ValuePtr
    -- if value is re-written multiple times, we can do it in-place as long as it fits
  , vhAllocSize :: Word32
  , vhKeySize :: Word32
  , vhValueSize :: Word32
  }
derive makeBinary ''ValueHeader

atVhNextCollision :: (ValuePtr -> ValuePtr) -> ValueHeader -> ValueHeader
atVhNextCollision f v = v { vhNextCollision = f (vhNextCollision v) }

encode :: Binary a => a -> SBS.ByteString
encode = strictify . runPut . put

decode :: Binary a => SBS.ByteString -> a
decode = runGet get . lazify

binaryPutSize :: Binary a => a -> Word64
binaryPutSize = fromIntegral . SBS.length . encode

-- Make a fake KeyRecord because Binary doesn't have a calcSize
-- operation (TODO: Use my own combinators for fixed-size records)
keyRecordSize :: Word64
keyRecordSize = binaryPutSize (0 :: KeyRecord)

valueHeaderSize :: Word64
valueHeaderSize = binaryPutSize $ ValueHeader 0 0 0 0

makeFileName :: String -> FilePath -> HashFunction -> Size -> FilePath
makeFileName prefix path func size =
  path </> concat [prefix, hfName func, "_", show size]

keysFileName :: FilePath -> HashFunction -> Size -> FilePath
keysFileName = makeFileName "keys"

valuesFileName :: FilePath -> HashFunction -> Size -> FilePath
valuesFileName = makeFileName "values"

data FileAlreadyExists = FileAlreadyExists String deriving (Show, Typeable)
instance Exc.Exception FileAlreadyExists
assertNotExists :: FilePath -> IO ()
assertNotExists fileName = do
  de <- Directory.doesDirectoryExist fileName
  fe <- Directory.doesFileExist fileName
  when (de || fe) $ Exc.throwIO (FileAlreadyExists fileName)

createDatabase :: FilePath -> HashFunction -> Size -> IO Database
createDatabase path func size = do
  Directory.createDirectory path
  keysHandle <- mkHandle keysFileName
  IO.hSetFileSize keysHandle . fromIntegral $ sizeLinear size * keyRecordSize
  valuesHandle <- mkHandle valuesFileName
  -- Offset 0 in the values file is used as invalid, so just write a useless 16-byte
  SBS.hPut valuesHandle $ SBS.replicate 16 0
  return $ Database path size func keysHandle valuesHandle
  where
    mkHandle f = do
      let fileName = f path func size
      assertNotExists fileName
      IO.openFile fileName IO.ReadWriteMode

openDatabase :: FilePath -> HashFunction -> Size -> IO Database
openDatabase path func size =
  Database path size func <$> mkHandle keysFileName <*> mkHandle valuesFileName
  where
    mkHandle f = IO.openFile (f path func size) IO.ReadWriteMode

closeDatabase :: Database -> IO ()
closeDatabase db = do
  IO.hClose $ dbKeysHandle db
  IO.hClose $ dbValuesHandle db

withCreateDatabase :: FilePath -> HashFunction -> Size -> (Database -> IO a) -> IO a
withCreateDatabase path func size =
  Exc.bracket (createDatabase path func size) closeDatabase

withOpenDatabase :: FilePath -> HashFunction -> Size -> (Database -> IO a) -> IO a
withOpenDatabase path func size =
  Exc.bracket (openDatabase path func size) closeDatabase

hashKey :: Database -> Key -> KeyPtr
hashKey db key = hfHash (dbHashFunc db) key (dbSize db)

invalidValuePtr :: ValuePtr
invalidValuePtr = 0

keyFileOffset :: KeyPtr -> Word64
keyFileOffset = (* keyRecordSize)

keyFileRange :: KeyPtr -> FileRange
keyFileRange i = FileRange (keyFileOffset i) keyRecordSize

readFileRange :: IO.Handle -> FileRange -> IO SBS.ByteString
readFileRange handle (FileRange offset size) = do
  IO.hSeek handle IO.AbsoluteSeek $ fromIntegral offset
  SBS.hGet handle $ fromIntegral size

decodeFileRange :: Binary a => IO.Handle -> FileRange -> IO a
decodeFileRange handle rng = decode <$> readFileRange handle rng

writeFileRange :: IO.Handle -> Word64 -> SBS.ByteString -> IO ()
writeFileRange handle offset bs = do
  IO.hSeek handle IO.AbsoluteSeek $ fromIntegral offset
  SBS.hPut handle bs

strictify :: LBS.ByteString -> SBS.ByteString
strictify = SBS.concat . LBS.toChunks

lazify :: SBS.ByteString -> LBS.ByteString
lazify = LBS.fromChunks . (: [])

data ValuePtrRef = ValuePtrRef
  { vprVal :: ValuePtr
  , vprSet :: ValuePtr -> IO ()
  }

hashValuePtrRef :: Database -> Key -> IO ValuePtrRef
hashValuePtrRef db key = do
  valuePtr <-
    decodeFileRange (dbKeysHandle db) $ keyFileRange keyPtr
  return ValuePtrRef
    { vprVal = valuePtr
    , vprSet = writeFileRange (dbKeysHandle db) (keyFileOffset keyPtr) . encode
    }
    where
      keyPtr = hashKey db key

valueKeyRange :: ValuePtr -> ValueHeader -> FileRange
valueKeyRange valuePtr header =
  FileRange (valuePtr + valueHeaderSize) . fromIntegral $ vhKeySize header

valueDataRange :: ValuePtr -> ValueHeader -> FileRange
valueDataRange valuePtr header =
  FileRange (valuePtr + valueHeaderSize + fromIntegral (vhKeySize header)) . fromIntegral $
  vhValueSize header

findKey :: Database -> Key -> IO (Maybe (ValuePtrRef, ValueHeader))
findKey db key =
  find =<< hashValuePtrRef db key
  where
    find valuePtrRef
      | vprVal valuePtrRef == invalidValuePtr = return Nothing
      | otherwise = do
        valueHeader <-
          decodeFileRange (dbValuesHandle db)
          (FileRange (vprVal valuePtrRef) valueHeaderSize)
        vKey <-
          readFileRange (dbValuesHandle db) $
          valueKeyRange (vprVal valuePtrRef) valueHeader
        if key == vKey
          then return $ Just (valuePtrRef, valueHeader)
          else find $ nextCollisionRef (vprVal valuePtrRef) valueHeader
    nextCollisionRef valuePtr valueHeader = ValuePtrRef
        { vprVal = vhNextCollision valueHeader
        , vprSet =
          writeFileRange (dbValuesHandle db)
          valuePtr . encode . flip (atVhNextCollision . const) valueHeader
        }

readKey :: Database -> Key -> IO (Maybe Value)
readKey db key = do
  mValueRange <- findKey db key
  case mValueRange of
    Nothing -> return Nothing
    Just (valuePtrRef, valueHeader) ->
      Just <$>
      readFileRange (dbValuesHandle db)
      (valueDataRange (vprVal valuePtrRef) valueHeader)

pairLengths :: Key -> Value -> (Word32, Word32)
pairLengths key value = (keyLen, valueLen)
  where
    keyLen = fromIntegral $ SBS.length key
    valueLen = fromIntegral $ SBS.length value

appendNewValue :: Database -> ValuePtr -> Key -> Value -> IO ValuePtr
appendNewValue db nextCollision key value = do
  valuePtr <- fromIntegral <$> IO.hFileSize (dbValuesHandle db)
  let
    headerStr = encode ValueHeader
      { vhNextCollision = nextCollision
      , vhAllocSize = keyLen + valueLen
      , vhKeySize = keyLen
      , vhValueSize = valueLen
      }
  writeFileRange (dbValuesHandle db) valuePtr $ mconcat [headerStr, key, value]
  return valuePtr
  where
    (keyLen, valueLen) = pairLengths key value

writeKey :: Database -> Key -> Value -> IO ()
writeKey db key value = do
  mResult <- findKey db key
  case mResult of
    Just (valuePtrRef, valueHeader) ->
      if vhAllocSize valueHeader >= keyLen + valueLen then
        -- re-use existing storage:
        let headerStr = encode valueHeader { vhKeySize = keyLen, vhValueSize = valueLen }
        in writeFileRange (dbValuesHandle db) (vprVal valuePtrRef) $ mconcat [headerStr, key, value]
      else
        -- The old value now becomes unreachable
        setValue valuePtrRef $ vhNextCollision valueHeader
    Nothing -> do
      hashAnchor <- hashValuePtrRef db key
      setValue hashAnchor $ vprVal hashAnchor
  where
    (keyLen, valueLen) = pairLengths key value
    setValue ref nextCollision =
      vprSet ref =<< appendNewValue db nextCollision key value

deleteKey :: Database -> Key -> IO ()
deleteKey db key = do
  mResult <- findKey db key
  case mResult of
    Just (valuePtrRef, valueHeader) ->
      -- The old value now becomes unreachable
      vprSet valuePtrRef $ vhNextCollision valueHeader
    Nothing ->
      -- TODO: Throw exception?
      return ()