module Database.KeyValueHash
( Key, Value
, Size, mkSize, sizeLinear
, HashFunction, stdHash, mkHashFunc
, Database, createDatabase, openDatabase, closeDatabase
, withCreateDatabase, withOpenDatabase
, readKey, writeKey, deleteKey
) where
import Control.Applicative ((<$>), (<*>))
import Control.Monad (when)
import Data.Binary (Binary(..))
import Data.Binary.Get (runGet)
import Data.Binary.Put (runPut)
import Data.Derive.Binary(makeBinary)
import Data.DeriveTH(derive)
import Data.Hashable (Hashable, hash)
import Data.Monoid (mconcat)
import Data.Typeable (Typeable)
import Data.Word (Word8, Word32, Word64)
import System.FilePath ((</>))
import qualified Control.Exception as Exc
import qualified Data.ByteString as SBS
import qualified Data.ByteString.Lazy as LBS
import qualified System.Directory as Directory
import qualified System.IO as IO
type Key = SBS.ByteString
type Value = SBS.ByteString
newtype Size = Size {
sizeLog :: Word8
}
sizeLinear :: Size -> Word64
sizeLinear = (2^) . sizeLog
instance Show Size where
show (Size x) = "sz" ++ show x
mkSize :: Word64 -> Size
mkSize = Size . (ceiling :: Double -> Word8) . logBase 2 . fromIntegral
data HashFunction = HashFunction
{ hfHash :: Key -> Size -> Word64
, hfName :: String
}
cap :: Word64 -> Size -> Word64
cap val size = val `mod` (sizeLinear size)
mkHashFunc :: String -> (Key -> Word64) -> HashFunction
mkHashFunc name f = HashFunction
{ hfName = name
, hfHash = cap . f
}
stdHash :: HashFunction
stdHash = mkHashFunc "Hashable" (fromIntegral . hash)
data Database = Database
{ _dbPath :: FilePath
, dbSize :: Size
, dbHashFunc :: HashFunction
, dbKeysHandle :: IO.Handle
, dbValuesHandle :: IO.Handle
}
data FileRange = FileRange
{ _frOffset :: Word64
, _frSize :: Word64
}
derive makeBinary ''FileRange
type ValuePtr = Word64
type KeyPtr = Word64
type KeyRecord = ValuePtr
data ValueHeader = ValueHeader
{ vhNextCollision :: ValuePtr
, vhAllocSize :: Word32
, vhKeySize :: Word32
, vhValueSize :: Word32
}
derive makeBinary ''ValueHeader
atVhNextCollision :: (ValuePtr -> ValuePtr) -> ValueHeader -> ValueHeader
atVhNextCollision f v = v { vhNextCollision = f (vhNextCollision v) }
encode :: Binary a => a -> SBS.ByteString
encode = strictify . runPut . put
decode :: Binary a => SBS.ByteString -> a
decode = runGet get . lazify
binaryPutSize :: Binary a => a -> Word64
binaryPutSize = fromIntegral . SBS.length . encode
keyRecordSize :: Word64
keyRecordSize = binaryPutSize (0 :: KeyRecord)
valueHeaderSize :: Word64
valueHeaderSize = binaryPutSize $ ValueHeader 0 0 0 0
makeFileName :: String -> FilePath -> HashFunction -> Size -> FilePath
makeFileName prefix path func size =
path </> concat [prefix, hfName func, "_", show size]
keysFileName :: FilePath -> HashFunction -> Size -> FilePath
keysFileName = makeFileName "keys"
valuesFileName :: FilePath -> HashFunction -> Size -> FilePath
valuesFileName = makeFileName "values"
data FileAlreadyExists = FileAlreadyExists String deriving (Show, Typeable)
instance Exc.Exception FileAlreadyExists
assertNotExists :: FilePath -> IO ()
assertNotExists fileName = do
de <- Directory.doesDirectoryExist fileName
fe <- Directory.doesFileExist fileName
when (de || fe) $ Exc.throwIO (FileAlreadyExists fileName)
createDatabase :: FilePath -> HashFunction -> Size -> IO Database
createDatabase path func size = do
Directory.createDirectory path
keysHandle <- mkHandle keysFileName
IO.hSetFileSize keysHandle . fromIntegral $ sizeLinear size * keyRecordSize
valuesHandle <- mkHandle valuesFileName
SBS.hPut valuesHandle $ SBS.replicate 16 0
return $ Database path size func keysHandle valuesHandle
where
mkHandle f = do
let fileName = f path func size
assertNotExists fileName
IO.openFile fileName IO.ReadWriteMode
openDatabase :: FilePath -> HashFunction -> Size -> IO Database
openDatabase path func size =
Database path size func <$> mkHandle keysFileName <*> mkHandle valuesFileName
where
mkHandle f = IO.openFile (f path func size) IO.ReadWriteMode
closeDatabase :: Database -> IO ()
closeDatabase db = do
IO.hClose $ dbKeysHandle db
IO.hClose $ dbValuesHandle db
withCreateDatabase :: FilePath -> HashFunction -> Size -> (Database -> IO a) -> IO a
withCreateDatabase path func size =
Exc.bracket (createDatabase path func size) closeDatabase
withOpenDatabase :: FilePath -> HashFunction -> Size -> (Database -> IO a) -> IO a
withOpenDatabase path func size =
Exc.bracket (openDatabase path func size) closeDatabase
hashKey :: Database -> Key -> KeyPtr
hashKey db key = hfHash (dbHashFunc db) key (dbSize db)
invalidValuePtr :: ValuePtr
invalidValuePtr = 0
keyFileOffset :: KeyPtr -> Word64
keyFileOffset = (* keyRecordSize)
keyFileRange :: KeyPtr -> FileRange
keyFileRange i = FileRange (keyFileOffset i) keyRecordSize
readFileRange :: IO.Handle -> FileRange -> IO SBS.ByteString
readFileRange handle (FileRange offset size) = do
IO.hSeek handle IO.AbsoluteSeek $ fromIntegral offset
SBS.hGet handle $ fromIntegral size
decodeFileRange :: Binary a => IO.Handle -> FileRange -> IO a
decodeFileRange handle rng = decode <$> readFileRange handle rng
writeFileRange :: IO.Handle -> Word64 -> SBS.ByteString -> IO ()
writeFileRange handle offset bs = do
IO.hSeek handle IO.AbsoluteSeek $ fromIntegral offset
SBS.hPut handle bs
strictify :: LBS.ByteString -> SBS.ByteString
strictify = SBS.concat . LBS.toChunks
lazify :: SBS.ByteString -> LBS.ByteString
lazify = LBS.fromChunks . (: [])
data ValuePtrRef = ValuePtrRef
{ vprVal :: ValuePtr
, vprSet :: ValuePtr -> IO ()
}
hashValuePtrRef :: Database -> Key -> IO ValuePtrRef
hashValuePtrRef db key = do
valuePtr <-
decodeFileRange (dbKeysHandle db) $ keyFileRange keyPtr
return ValuePtrRef
{ vprVal = valuePtr
, vprSet = writeFileRange (dbKeysHandle db) (keyFileOffset keyPtr) . encode
}
where
keyPtr = hashKey db key
valueKeyRange :: ValuePtr -> ValueHeader -> FileRange
valueKeyRange valuePtr header =
FileRange (valuePtr + valueHeaderSize) . fromIntegral $ vhKeySize header
valueDataRange :: ValuePtr -> ValueHeader -> FileRange
valueDataRange valuePtr header =
FileRange (valuePtr + valueHeaderSize + fromIntegral (vhKeySize header)) . fromIntegral $
vhValueSize header
findKey :: Database -> Key -> IO (Maybe (ValuePtrRef, ValueHeader))
findKey db key =
find =<< hashValuePtrRef db key
where
find valuePtrRef
| vprVal valuePtrRef == invalidValuePtr = return Nothing
| otherwise = do
valueHeader <-
decodeFileRange (dbValuesHandle db)
(FileRange (vprVal valuePtrRef) valueHeaderSize)
vKey <-
readFileRange (dbValuesHandle db) $
valueKeyRange (vprVal valuePtrRef) valueHeader
if key == vKey
then return $ Just (valuePtrRef, valueHeader)
else find $ nextCollisionRef (vprVal valuePtrRef) valueHeader
nextCollisionRef valuePtr valueHeader = ValuePtrRef
{ vprVal = vhNextCollision valueHeader
, vprSet =
writeFileRange (dbValuesHandle db)
valuePtr . encode . flip (atVhNextCollision . const) valueHeader
}
readKey :: Database -> Key -> IO (Maybe Value)
readKey db key = do
mValueRange <- findKey db key
case mValueRange of
Nothing -> return Nothing
Just (valuePtrRef, valueHeader) ->
Just <$>
readFileRange (dbValuesHandle db)
(valueDataRange (vprVal valuePtrRef) valueHeader)
pairLengths :: Key -> Value -> (Word32, Word32)
pairLengths key value = (keyLen, valueLen)
where
keyLen = fromIntegral $ SBS.length key
valueLen = fromIntegral $ SBS.length value
appendNewValue :: Database -> ValuePtr -> Key -> Value -> IO ValuePtr
appendNewValue db nextCollision key value = do
valuePtr <- fromIntegral <$> IO.hFileSize (dbValuesHandle db)
let
headerStr = encode ValueHeader
{ vhNextCollision = nextCollision
, vhAllocSize = keyLen + valueLen
, vhKeySize = keyLen
, vhValueSize = valueLen
}
writeFileRange (dbValuesHandle db) valuePtr $ mconcat [headerStr, key, value]
return valuePtr
where
(keyLen, valueLen) = pairLengths key value
writeKey :: Database -> Key -> Value -> IO ()
writeKey db key value = do
mResult <- findKey db key
case mResult of
Just (valuePtrRef, valueHeader) ->
if vhAllocSize valueHeader >= keyLen + valueLen then
let headerStr = encode valueHeader { vhKeySize = keyLen, vhValueSize = valueLen }
in writeFileRange (dbValuesHandle db) (vprVal valuePtrRef) $ mconcat [headerStr, key, value]
else
setValue valuePtrRef $ vhNextCollision valueHeader
Nothing -> do
hashAnchor <- hashValuePtrRef db key
setValue hashAnchor $ vprVal hashAnchor
where
(keyLen, valueLen) = pairLengths key value
setValue ref nextCollision =
vprSet ref =<< appendNewValue db nextCollision key value
deleteKey :: Database -> Key -> IO ()
deleteKey db key = do
mResult <- findKey db key
case mResult of
Just (valuePtrRef, valueHeader) ->
vprSet valuePtrRef $ vhNextCollision valueHeader
Nothing ->
return ()