module Data.Fortune.Index
( Index
, openIndex
, createVirtualIndex
, closeIndex
, getStats
, StatsProblem(..)
, HeaderProblem(..)
, IndexProblem(..)
, checkIndex
, IndexEntry(..)
, indexEntryStats
, getEntries
, getEntry
, unfoldEntries
, appendEntries
, appendEntry
, clearIndex
, rebuildStats
) where
import Control.Applicative
import Control.Concurrent.MVar
import Control.Exception
import Control.Monad
import qualified Data.ByteString as BS
import Data.Foldable (foldMap)
import Data.Fortune.Stats
import Data.Knob
import Data.Maybe
import Data.Semigroup
import Data.Serialize
import Data.Typeable
import qualified Data.Vector as V
import Data.Word
import System.IO
runGetM getThing = either fail return . runGet getThing
magic, currentVersion :: Word32
magic = 0xbdcbcdb
currentVersion = 2
headerLength = 64
headerReservedLength = 28
data Header = Header
{ stats :: !FortuneStats
, indexLoc :: !Int
} deriving (Eq, Show)
emptyHeader = Header mempty headerLength
data HeaderProblem
= BadMagicNumber !Word32
| UnsupportedVersion !Word32
| StatsProblem !StatsProblem
| TableStartsBeforeHeaderEnds
deriving (Eq, Ord, Read, Show, Typeable)
checkHeader (Header stats loc)
| loc < headerLength = Just TableStartsBeforeHeaderEnds
| otherwise = StatsProblem <$> checkStats stats
knownVersions = [(currentVersion, getRestV2)]
getHeader = do
n <- getWord32be
when (n /= magic) $ throw (BadMagicNumber n)
version <- getWord32be
case lookup version knownVersions of
Just getRest -> getRest
Nothing -> throw (UnsupportedVersion version)
getRestV2 = do
indexLoc <- fromIntegral <$> getWord32be
numFortunes <- Sum . fromIntegral <$> getWord32be
maxChars <- Max . fromIntegral <$> getWord32be
minChars <- Min . fromIntegral <$> getWord32be
maxLines <- Max . fromIntegral <$> getWord32be
minLines <- Min . fromIntegral <$> getWord32be
offsetAfter <- Max . fromIntegral <$> getWord32be
skip headerReservedLength
return Header {stats = FortuneStats{..}, ..}
putHeader Header {stats = FortuneStats{..}, ..} = do
putWord32be magic
putWord32be currentVersion
putWord32be (fromIntegral indexLoc)
putWord32be (fromIntegral (getSum numFortunes))
putWord32be (fromIntegral (getMax maxChars))
putWord32be (fromIntegral (getMin minChars))
putWord32be (fromIntegral (getMax maxLines))
putWord32be (fromIntegral (getMin minLines))
putWord32be (fromIntegral (getMax offsetAfter))
replicateM_ headerReservedLength (putWord8 0)
data Index = Index !Handle !(MVar Header)
openIndex :: FilePath -> Bool -> IO Index
openIndex path writeMode = do
file <- openFile path (if writeMode then ReadWriteMode else ReadMode)
openIndex' file writeMode
createVirtualIndex :: IO Index
createVirtualIndex = do
knob <- newKnob BS.empty
file <- newFileHandle knob "<createVirtualIndex>" ReadWriteMode
openIndex' file True
openIndex' :: Handle -> Bool -> IO Index
openIndex' file writeMode = do
hSetBinaryMode file True
hSetBuffering file NoBuffering
isEmpty <- hIsEOF file
when (writeMode && isEmpty) $ do
BS.hPut file (runPut (putHeader emptyHeader))
hSeek file AbsoluteSeek 0
hdr <- BS.hGet file headerLength
case runGet getHeader hdr of
Left err -> fail err
Right hdr -> do
mbProblem <- checkIndex_ file hdr
case mbProblem of
Just (HeaderProblem StatsProblem{}) -> void (rebuildStats_ file hdr)
Just p -> throwIO p
Nothing -> return ()
hdrRef <- newMVar hdr
return (Index file hdrRef)
closeIndex :: Index -> IO ()
closeIndex (Index file mv) = do
hClose file
takeMVar mv
putMVar mv (throw AccessToClosedIndex)
data IndexProblem
= HeaderProblem !HeaderProblem
| TableLongerThanFile
| AccessToClosedIndex
deriving (Eq, Ord, Read, Show, Typeable)
instance Exception StatsProblem where
fromException se@(SomeException e) = listToMaybe $ catMaybes
[ cast e
, do StatsProblem p <- fromException se; return p
]
instance Exception HeaderProblem where
fromException se@(SomeException e) = listToMaybe $ catMaybes
[ cast e
, StatsProblem <$> fromException se
, do HeaderProblem p <- fromException se; return p
]
instance Exception IndexProblem where
fromException se@(SomeException e) = listToMaybe $ catMaybes
[ cast e
, HeaderProblem <$> fromException se
]
checkIndex :: Index -> IO (Maybe IndexProblem)
checkIndex (Index file hdrRef) =
either Just id <$> try (withMVar hdrRef (checkIndex_ file))
checkIndex_ file hdr =
case checkHeader hdr of
Just problem -> return (Just (HeaderProblem problem))
Nothing -> do
let base = indexLoc hdr
count = numFortunes (stats hdr)
end = base + getSum count * indexEntryLength
len <- hFileSize file
return $! if len < toInteger end
then Just TableLongerThanFile
else Nothing
withIndex ix@(Index file hdrRef) action = withMVar hdrRef $ \hdr -> do
let base = indexLoc hdr
count = numFortunes (stats hdr)
res <- action file base (getSum count)
checkIndex_ file hdr >>= maybe (return res) throwIO
modifyHeader (Index file hdrRef) action = modifyMVar_ hdrRef $ \hdr -> do
newHdr <- action file hdr
when (newHdr /= hdr) $ do
hSeek file AbsoluteSeek 0
BS.hPut file (runPut (putHeader newHdr))
checkIndex_ file newHdr >>= maybe (return newHdr) throwIO
getStats :: Index -> IO FortuneStats
getStats (Index _ hdrRef) = stats <$> readMVar hdrRef
indexEntryLength = 16
data IndexEntry = IndexEntry
{ stringOffset :: !Int
, stringBytes :: !Int
, stringChars :: !Int
, stringLines :: !Int
} deriving (Eq, Ord, Show)
indexEntryStats :: IndexEntry -> FortuneStats
indexEntryStats (IndexEntry o n cs ls) = FortuneStats
{ numFortunes = Sum 1, offsetAfter = Max (o + n)
, minChars = Min cs, maxChars = Max cs
, minLines = Min ls, maxLines = Max ls
}
putIndexEntry IndexEntry{..} = do
putWord32be (fromIntegral stringOffset)
putWord32be (fromIntegral stringBytes)
putWord32be (fromIntegral stringChars)
putWord32be (fromIntegral stringLines)
getIndexEntry = do
stringOffset <- fromIntegral <$> getWord32be
stringBytes <- fromIntegral <$> getWord32be
stringChars <- fromIntegral <$> getWord32be
stringLines <- fromIntegral <$> getWord32be
return IndexEntry{..}
getEntries :: Index -> IO (V.Vector IndexEntry)
getEntries ix = withIndex ix $ \file base count -> do
hSeek file AbsoluteSeek (toInteger base)
buf <- BS.hGet file (count * indexEntryLength)
runGetM (V.replicateM count getIndexEntry) buf
getEntry :: Index -> Int -> IO IndexEntry
getEntry ix@(Index file hdrRef) i
| i < 0 = rangeErr
| otherwise = withIndex ix $ \file base count -> do
when (i >= count) rangeErr
hSeek file AbsoluteSeek (toInteger (base + i * indexEntryLength))
BS.hGet file indexEntryLength >>= runGetM getIndexEntry
where rangeErr = fail ("getEntry: index out of range: " ++ show i)
unfoldEntries :: Index -> IO (Maybe IndexEntry) -> IO ()
unfoldEntries ix getEntry = modifyHeader ix $ \file hdr -> do
let base = indexLoc hdr
count = numFortunes (stats hdr)
end = base + getSum count * indexEntryLength
loop s = do
mbEntry <- getEntry
case mbEntry of
Nothing -> return s
Just entry -> do
BS.hPut file (runPut (putIndexEntry entry))
loop $! (s <> indexEntryStats entry)
hSeek file AbsoluteSeek (toInteger end)
newStats <- loop (stats hdr)
return hdr {stats = newStats}
appendEntries :: Index -> V.Vector IndexEntry -> IO ()
appendEntries ix entries
| V.null entries = return ()
| otherwise = modifyHeader ix $ \file hdr -> do
let base = indexLoc hdr
count = numFortunes (stats hdr)
end = base + getSum count * indexEntryLength
hSeek file AbsoluteSeek (toInteger end)
BS.hPut file (runPut (V.mapM_ putIndexEntry entries))
return hdr {stats = stats hdr <> foldMap indexEntryStats entries}
appendEntry :: Index -> IndexEntry -> IO ()
appendEntry ix = appendEntries ix . V.singleton
clearIndex :: Index -> IO ()
clearIndex ix = modifyHeader ix $ \file _ -> do
hSetFileSize file (toInteger headerLength)
return emptyHeader
rebuildStats :: Index -> IO ()
rebuildStats ix = modifyHeader ix rebuildStats_
rebuildStats_ file hdr = do
let n = getSum (numFortunes (stats hdr))
chunk = 4096 `div` indexEntryLength
loop i s
| i >= n = return s
| otherwise = do
let m = min chunk (n i)
entries <- runGetM (replicateM m getIndexEntry) =<< BS.hGet file (m * indexEntryLength)
loop (i + chunk) (s <> foldMap indexEntryStats entries)
hSeek file AbsoluteSeek (toInteger (indexLoc hdr))
newStats <- loop 0 mempty
return hdr {stats = newStats}