module Hunt.Interpreter
(
initHunt
, runCmd
, execCmd
, runHunt
, Hunt
, HuntT (..)
, HuntEnv (..)
, DefHuntEnv
)
where
import Control.Applicative
import Control.Arrow (second)
import Control.Concurrent.XMVar
import Control.Monad.Error
import Control.Monad.Reader
import Data.Aeson (ToJSON (..), object, (.=))
import Data.Binary (Binary, encodeFile)
import qualified Data.ByteString.Lazy as BL
import Data.Default
import qualified Data.List as L
import qualified Data.Map as M
import Data.Set (Set)
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Traversable as TV
import Hunt.Common
import Hunt.Common.ApiDocument as ApiDoc
import qualified Hunt.Common.DocDesc as DocDesc
import qualified Hunt.Common.DocIdSet as DocIdSet
import Hunt.ContextIndex (ContextIndex (..), ContextMap)
import qualified Hunt.ContextIndex as CIx
import Hunt.DocTable (DValue, DocTable)
import qualified Hunt.DocTable as DocTable
import Hunt.DocTable.HashedDocTable
import qualified Hunt.Index as Ix
import Hunt.Index.IndexImpl (IndexImpl (..), mkIndex)
import Hunt.Index.Schema.Analyze
import Hunt.Interpreter.BasicCommand
import Hunt.Interpreter.Command (Command)
import Hunt.Interpreter.Command hiding (Command (..))
import Hunt.Query.Intermediate (ScoredDocs, ScoredWords,
UnScoredDocs, toDocIdSet,
toDocsResult, RankedDoc(..),
toDocumentResultPage,
toWordsResult)
import Hunt.Query.Language.Grammar
import Hunt.Query.Processor (ProcessConfig (..),
initProcessor,
processQueryScoredDocs,
processQueryScoredWords,
processQueryUnScoredDocs)
import Hunt.Query.Ranking
import Hunt.Utility (showText)
import Hunt.Utility.Log
import System.IO.Error (isAlreadyInUseError,
isDoesNotExistError,
isFullError, isPermissionError,
tryIOError)
import qualified System.Log.Logger as Log
import GHC.Stats (getGCStats, getGCStatsEnabled)
import GHC.Stats.Json ()
modName :: String
modName = "Hunt.Interpreter"
debugM :: MonadIO m => String -> m ()
debugM = liftIO . Log.debugM modName
errorM :: MonadIO m => String -> m ()
errorM = liftIO . Log.errorM modName
data HuntEnv dt = HuntEnv
{
huntIndex :: DocTable dt => XMVar (ContextIndex dt)
, huntRankingCfg :: RankConfig (DValue dt)
, huntTypes :: ContextTypes
, huntNormalizers :: [CNormalizer]
, huntQueryCfg :: ProcessConfig
}
type DefHuntEnv = HuntEnv (Documents Document)
initHunt :: DocTable dt => IO (HuntEnv dt)
initHunt = initHuntEnv CIx.empty defaultRankConfig contextTypes normalizers def
contextTypes :: ContextTypes
contextTypes = [ctText, ctInt, ctDate, ctPosition, ctTextSimple, ctPositionRTree]
normalizers :: [CNormalizer]
normalizers = [cnUpperCase, cnLowerCase, cnZeroFill]
initHuntEnv :: DocTable dt
=> ContextIndex dt
-> RankConfig (DValue dt)
-> ContextTypes
-> [CNormalizer]
-> ProcessConfig
-> IO (HuntEnv dt)
initHuntEnv ixx rnk opt ns qc = do
ixref <- newXMVar ixx
return $ HuntEnv ixref rnk opt ns qc
newtype HuntT dt m a
= HuntT { runHuntT :: ReaderT (HuntEnv dt) (ErrorT CmdError m) a }
deriving
(Applicative, Monad, MonadIO, Functor, MonadReader (HuntEnv dt), MonadError CmdError)
instance MonadTrans (HuntT dt) where
lift = HuntT . lift . lift
type Hunt dt = HuntT dt IO
runHunt :: DocTable dt => HuntT dt m a -> HuntEnv dt -> m (Either CmdError a)
runHunt env = runErrorT . runReaderT (runHuntT env)
runCmd :: (DocTable dt, Binary dt) => HuntEnv dt -> Command -> IO (Either CmdError CmdResult)
runCmd env cmd
= runErrorT . runReaderT (runHuntT . execCmd $ cmd) $ env
askIx :: DocTable dt => Hunt dt (ContextIndex dt)
askIx = do
ref <- asks huntIndex
liftIO $ readXMVar ref
modIx :: DocTable dt
=> (ContextIndex dt -> Hunt dt (ContextIndex dt, a)) -> Hunt dt a
modIx f = do
ref <- asks huntIndex
ix <- liftIO $ takeXMVarWrite ref
(i',a) <- f ix `catchError` putBack ref ix
liftIO $ putXMVarWrite ref i'
return a
where
putBack ref i e = do
liftIO $ putXMVarWrite ref i
throwError e
modIxLocked :: DocTable dt
=> (ContextIndex dt -> Hunt dt (ContextIndex dt, a)) -> Hunt dt a
modIxLocked f = do
ref <- asks huntIndex
ix <- liftIO $ takeXMVarLock ref
(i',a) <- f ix `catchError` putBack ref ix
liftIO $ putXMVarLock ref i'
return a
where
putBack ref i e = do
liftIO $ putXMVarLock ref i
throwError e
withIx :: DocTable dt => (ContextIndex dt -> Hunt dt a) -> Hunt dt a
withIx f
= askIx >>= f
askType :: DocTable dt => Text -> Hunt dt ContextType
askType cn = do
ts <- asks huntTypes
case L.find (\t -> cn == ctName t) ts of
Just t -> return t
_ -> throwResError 410 ("used unavailable context type: " `T.append` cn)
askNormalizer :: DocTable dt => Text -> Hunt dt CNormalizer
askNormalizer cn = do
ts <- asks huntNormalizers
case L.find (\t -> cn == cnName t) ts of
Just t -> return t
_ -> throwResError 410 ("used unavailable normalizer: " `T.append` cn)
askIndex :: DocTable dt => Text -> Hunt dt IndexImpl
askIndex cn = ctIxImpl <$> askType cn
throwResError :: DocTable dt => Int -> Text -> Hunt dt a
throwResError n msg
= do errorM $ unwords [show n, T.unpack msg]
throwError $ ResError n msg
execCmd :: (Binary dt, DocTable dt) => Command -> Hunt dt CmdResult
execCmd
= execBasicCmd . toBasicCommand
execBasicCmd :: (Binary dt, DocTable dt) => BasicCommand -> Hunt dt CmdResult
execBasicCmd cmd@(InsertList _) = do
debugM $ "Exec: InsertList [..]"
execCmd' cmd
execBasicCmd cmd = do
debugM $ "Exec: " ++ logShow cmd
execCmd' cmd
execCmd' :: (Binary dt, DocTable dt) => BasicCommand -> Hunt dt CmdResult
execCmd' (Search q offset mx wg fields)
= withIx $ execSearch q offset mx wg fields
execCmd' (Completion q mx)
= withIx $ execCompletion q mx
execCmd' (Select q)
= withIx $ execSelect q
execCmd' (Sequence cs)
= execSequence cs
execCmd' NOOP
= return ResOK
execCmd' (Status sc)
= execStatus sc
execCmd' (InsertList docs)
= modIx $ execInsertList docs
execCmd' (Update doc)
= modIx $ execUpdate doc
execCmd' (DeleteDocs uris)
= modIx $ execDeleteDocs uris
execCmd' (DeleteByQuery q)
= modIx $ execDeleteByQuery q
execCmd' (StoreIx filename)
= withIx $ execStore filename
execCmd' (LoadIx filename)
= execLoad filename
execCmd' (InsertContext cx ct)
= modIx $ execInsertContext cx ct
execCmd' (DeleteContext cx)
= modIx $ execDeleteContext cx
execSequence :: (DocTable dt, Binary dt)=> [BasicCommand] -> Hunt dt CmdResult
execSequence [] = execBasicCmd NOOP
execSequence [c] = execBasicCmd c
execSequence (c : cs) = execBasicCmd c >> execSequence cs
execInsertContext :: DocTable dt
=> Context
-> ContextSchema
-> ContextIndex dt
-> Hunt dt (ContextIndex dt, CmdResult)
execInsertContext cx ct ixx
= do
contextExists <- CIx.hasContextM cx ixx
unless' (not contextExists)
409 $ "context already exists: " `T.append` cx
cType <- askType . ctName . cxType $ ct
impl <- askIndex . ctName . cxType $ ct
norms <- mapM (askNormalizer . cnName) $ cxNormalizer ct
return $ ( CIx.insertContext cx (newIx impl) (newSchema cType norms) ixx
, ResOK
)
where
newIx :: IndexImpl -> IndexImpl
newIx (IndexImpl i) = mkIndex $ Ix.empty `asTypeOf` i
newSchema cType norms= (ct { cxType = cType, cxNormalizer = norms })
execDeleteContext :: DocTable dt
=> Context
-> ContextIndex dt
-> Hunt dt (ContextIndex dt, CmdResult)
execDeleteContext cx ixx
= return (CIx.deleteContext cx ixx, ResOK)
execInsertList :: DocTable dt
=> [ApiDocument] -> ContextIndex dt -> Hunt dt (ContextIndex dt, CmdResult)
execInsertList docs ixx@(ContextIndex ix _dt)
= do
checkContextsExistence contexts ixx
checkDuplicates duplicates
mapM_ (flip (checkApiDocExistence False) ixx) docs
ixx' <- lift $ CIx.insertList docsAndWords ixx
return (ixx', ResOK)
where
contexts
= M.keys
. M.unions
. L.map (M.map (const ()) . adIndex)
$ docs
docsAndWords
= L.map ( (\ (d, _dw, ws) -> (d, ws))
. toDocAndWords (CIx.mapToSchema ix)
. (\ d -> d {adDescr = DocDesc.deleteNull $ adDescr d})
)
$ docs
duplicates
= M.keys
. M.filter (> 1)
. L.foldl ins M.empty
. L.map adUri
$ docs
where
ins m k = M.insertWith (+) k (1::Int) m
checkDuplicates xs
= unless' (L.null xs)
409 $ "duplicate URIs found in document list:" <> showText xs
execUpdate :: DocTable dt
=> ApiDocument -> ContextIndex dt -> Hunt dt (ContextIndex dt, CmdResult)
execUpdate doc ixx@(ContextIndex ix dt)
= do checkContextsExistence contexts ixx
docIdM <- lift $ DocTable.lookupByURI (uri docs) dt
case docIdM of
Just docId
-> do ixx' <- lift
$ CIx.modifyWithDescription (adWght doc) (desc docs) ws docId ixx
return (ixx', ResOK)
Nothing
-> throwResError 409 $ "document for update not found: " `T.append` uri docs
where
contexts
= M.keys $ adIndex doc
(docs, _dw, ws)
= toDocAndWords (CIx.mapToSchema ix) doc
checkContextsExistence :: DocTable dt
=> [Context] -> ContextIndex dt -> Hunt dt ()
checkContextsExistence cs ixx
= do ixxContexts <- S.fromList <$> CIx.contextsM ixx
let docContexts = S.fromList cs
let invalidContexts = S.difference docContexts ixxContexts
unless' (S.null invalidContexts)
409 ( "mentioned context(s) are not present: "
<> (showText . S.toList $ invalidContexts)
)
checkApiDocExistence :: DocTable dt
=> Bool -> ApiDocument -> ContextIndex dt -> Hunt dt ()
checkApiDocExistence switch apidoc ixx
= do let u = adUri apidoc
mem <- CIx.member u ixx
unless' (switch == mem)
409 ( ( if mem
then "document already exists: "
else "document does not exist: "
) <> u
)
execSearch :: DocTable dt =>
Query ->
Int -> Int ->
Bool -> Maybe [Text] ->
ContextIndex dt ->
Hunt dt CmdResult
execSearch q offset mx wg fields (ContextIndex ix dt)
= do debugM ("execSearch: " ++ show q)
cfg <- asks huntQueryCfg
scDocs <- liftHunt $
runQueryScoredDocsM ix cfg q
formatPage <$> toDocsResult dt scDocs
where
formatPage ds
= ResSearch $
LimitedResult
{ lrResult = ds'
, lrOffset = offset
, lrMax = mx
, lrCount = length ds
}
where
ds' = map (mkSelect wg fields)
. toDocumentResultPage offset mx
$ ds
execCompletion :: DocTable dt =>
Query ->
Int ->
ContextIndex dt -> Hunt dt CmdResult
execCompletion q mx (ContextIndex ix _dt)
= do debugM ("execCompletion: " ++ show q)
cfg <- asks huntQueryCfg
scWords <- liftHunt $
runQueryScoredWordsM ix cfg q
return $ ResSuggestion $ toWordsResult mx scWords
execSelect :: DocTable dt => Query -> ContextIndex dt -> Hunt dt CmdResult
execSelect q (ContextIndex ix dt)
= do debugM ("execSelect: " ++ show q)
res <- liftHunt $ runQueryUnScoredDocsM ix queryConfigDocIds q
dt' <- DocTable.restrict (toDocIdSet res) dt
djs <- DocTable.toJSON'DocTable dt'
return $ ResGeneric djs
mkSelect :: Bool -> Maybe [Text] -> (RankedDoc -> RankedDoc)
mkSelect withWeight fields
= mkSelW withWeight . mkSelF fields
where
mkSelW True = id
mkSelW False = RD . second (\d -> d { wght = 1.0 }) . unRD
mkSelF Nothing = id
mkSelF (Just fs) = RD . second (\d -> d {desc = DocDesc.restrict fs (desc d)}) . unRD
execDeleteDocs :: DocTable dt => Set URI -> ContextIndex dt -> Hunt dt (ContextIndex dt, CmdResult)
execDeleteDocs d ix
= do ix' <- lift $ CIx.deleteDocsByURI d ix
return (ix', ResOK)
execDeleteByQuery :: DocTable dt => Query -> ContextIndex dt -> Hunt dt (ContextIndex dt, CmdResult)
execDeleteByQuery q ixx@(ContextIndex ix _dt)
= do debugM ("execDeleteByQuery: " ++ show q)
ds <- toDocIdSet <$>
(liftHunt $ runQueryUnScoredDocsM ix queryConfigDocIds q)
if DocIdSet.null ds
then do debugM "DeleteByQuery: Query result set empty"
return (ixx, ResOK)
else do debugM $ "DeleteByQuery: " ++ show ds
ix' <- lift $ CIx.delete ds ixx
return (ix', ResOK)
execStore :: (Binary a, DocTable dt) =>
FilePath -> a -> Hunt dt CmdResult
execStore filename x = do
res <- liftIO . tryIOError $ encodeFile filename x
case res of
Left e
| isAlreadyInUseError e -> throwResError 409 $ "Cannot store index: file is already in use"
| isPermissionError e -> throwResError 403 $ "Cannot store index: no access permission to file"
| isFullError e -> throwResError 500 $ "Cannot store index: device is full"
| otherwise -> throwResError 500 $ showText $ e
Right _ -> return ResOK
execLoad :: (Binary dt, DocTable dt) => FilePath -> Hunt dt CmdResult
execLoad filename = do
ts <- asks huntTypes
let ix = map ctIxImpl ts
modIxLocked $ \_ -> do
ixh@(ContextIndex ixs _) <- decodeFile' ix filename
ls <- TV.mapM reloadSchema $ CIx.cxMap ixs
return (ixh{ ciIndex = CIx.mkContextMap ls }, ResOK)
where
decodeFile' ts f = do
res <- liftIO . tryIOError $ CIx.decodeCxIx ts <$> BL.readFile f
case res of
Left e
| isAlreadyInUseError e -> throwResError 409 $ "Cannot load index: file already in use"
| isDoesNotExistError e -> throwResError 404 $ "Cannot load index: file does not exist"
| isPermissionError e -> throwResError 403 $ "Cannot load index: no access permission to file"
| otherwise -> throwResError 500 $ showText e
Right r -> return r
reloadSchema (s,ix) = do
cxt <- askType . ctName . cxType $ s
ns <- mapM (askNormalizer . cnName) (cxNormalizer s)
return $ ( s { cxType = cxt
, cxNormalizer = ns
}
, ix )
runQueryScoredDocsM :: ContextMap
-> ProcessConfig
-> Query
-> IO (Either CmdError ScoredDocs)
runQueryScoredDocsM ix cfg q
= processQueryScoredDocs st q
where
st = initProcessor cfg ix
runQueryUnScoredDocsM :: ContextMap
-> ProcessConfig
-> Query
-> IO (Either CmdError UnScoredDocs)
runQueryUnScoredDocsM ix cfg q
= processQueryUnScoredDocs st q
where
st = initProcessor cfg ix
runQueryScoredWordsM :: ContextMap
-> ProcessConfig
-> Query
-> IO (Either CmdError ScoredWords)
runQueryScoredWordsM ix cfg q
= processQueryScoredWords st q
where
st = initProcessor cfg ix
queryConfigDocIds :: ProcessConfig
queryConfigDocIds = ProcessConfig def True 0 0
liftHunt :: IO (Either CmdError r) -> Hunt dt r
liftHunt cmd
= lift cmd >>= either throwError return
execStatus :: DocTable dt => StatusCmd -> Hunt dt CmdResult
execStatus StatusGC
= do
statsEnabled <- liftIO getGCStatsEnabled
if statsEnabled
then (ResGeneric . toJSON) <$>
liftIO getGCStats
else throwResError 501 ("GC stats not enabled. Use `+RTS -T -RTS' to enable them." :: Text)
execStatus StatusDocTable
= withIx dumpDocTable
where
dumpDocTable (ContextIndex _ix dt)
= ResGeneric <$>
DocTable.toJSON'DocTable dt
execStatus (StatusContext cx)
= withIx dumpContext
where
dumpContext (ContextIndex ix _dt)
= (ResGeneric . object . map (uncurry (.=))) <$>
CIx.lookupAllWithCx cx ix
execStatus (StatusIndex )
= withIx _dumpIndex
where
_dumpIndex (ContextIndex _ix _dt)
= throwResError 501 $ "status of Index not yet implemented"
unless' :: DocTable dt
=> Bool -> Int -> Text -> Hunt dt ()
unless' b code text = unless b $ throwResError code text