module Hunt.Query.Processor
( processQueryScoredDocs
, processQueryUnScoredDocs
, processQueryScoredWords
, initProcessor
, ProcessConfig (..)
, ProcessEnv
)
where
import Control.Applicative
import Control.Monad.Error
import Control.Monad.Reader
import Data.Binary (Binary)
import qualified Data.Binary as Bin
import Data.Default
import qualified Data.List as L
import qualified Data.Map as M
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Hunt.Common
import Hunt.ContextIndex (ContextMap)
import qualified Hunt.ContextIndex as CIx
import Hunt.Interpreter.Command (CmdError (..))
import Hunt.Query.Fuzzy (FuzzyConfig)
import Hunt.Query.Intermediate
import Hunt.Query.Language.Grammar
import Hunt.Utility (showText)
import qualified System.Log.Logger as Log
modName :: String
modName = "Hunt.Query.Processor"
debugM :: String -> IO ()
debugM = Log.debugM modName
data ProcessConfig
= ProcessConfig
{ fuzzyConfig :: ! FuzzyConfig
, optimizeQuery :: ! Bool
, wordLimit :: ! Int
, docLimit :: ! Int
}
instance Default ProcessConfig where
def = ProcessConfig def False 100 500
instance Binary ProcessConfig where
put (ProcessConfig fc o l d)
= Bin.put fc >> Bin.put o >> Bin.put l >> Bin.put d
get
= ProcessConfig <$> Bin.get <*> Bin.get <*> Bin.get <*> Bin.get
data ProcessEnv
= ProcessEnv
{ psConfig :: ! ProcessConfig
, psContexts :: ! [Context]
, psIndex :: ContextMap
}
type QueryIndex
= ContextMap
newtype ProcessorT m a
= PT { runProcessor :: ReaderT ProcessEnv (ErrorT CmdError m) a
}
deriving (Applicative, Monad, MonadIO, Functor, MonadReader ProcessEnv, MonadError CmdError)
instance MonadTrans ProcessorT where
lift = PT . lift . lift
type Processor
= ProcessorT IO
queryError :: Int -> Text -> Processor a
queryError n msg
= throwError $ ResError n msg
getContexts :: Processor [Context]
getContexts
= asks psContexts
getIx :: Processor QueryIndex
getIx
= asks psIndex
getSchema :: Processor Schema
getSchema
= getIx >>= return . (M.map fst) . CIx.cxMap
getContextSchema :: Context -> Processor ContextSchema
getContextSchema c
= do schema <- getSchema
case M.lookup c schema of
Just cs -> return cs
_ -> queryError 420 ("Context does not exist in schema: " <> c)
normQueryCx :: Context -> Text -> Processor (Maybe Text)
normQueryCx c t
= do s <- getContextSchema c
if (validate . ctValidate . cxType $ s) t
then
do liftIO . debugM . debugMsg $ s
return . Just . norm $ s
else
return Nothing
where
norm s
= normalize' (cxNormalizer s) t
debugMsg s
= T.unpack $ T.concat [ "query normalizer: ", c, ": [", t, "=>", norm s, "]"]
initProcessor :: ProcessConfig -> QueryIndex -> ProcessEnv
initProcessor cfg ix
= ProcessEnv cfg cxs ix
where
s = CIx.mapToSchema ix
cxs = filter (\c -> fromMaybe False $ M.lookup c s >>= return . cxDefault)
$ CIx.contexts ix
processQueryUnScoredDocs :: ProcessEnv -> Query -> IO (Either CmdError UnScoredDocs)
processQueryUnScoredDocs = processQueryScoredResult evalUnScoredDocs
evalUnScoredDocs :: Query -> Processor UnScoredDocs
evalUnScoredDocs q
| isPrimaryQuery q
= do res <- forallCx (evalPrimary q)
aggregateToScoredResult res
evalUnScoredDocs (QRange lb ub)
= do res <- forallCx (evalRange lb ub)
aggregateToScoredResult res
evalUnScoredDocs (QSeq op qs)
| isLocalCxOp op
= do res <- forallCxLocal
( evalSeq' op
<$> mapM evalScoredRawDocs qs )
aggregateToScoredResult res
| otherwise
= evalSeq op
<$> mapM evalUnScoredDocs qs
evalUnScoredDocs (QContext cxs q)
= withCxs cxs $ evalUnScoredDocs q
evalUnScoredDocs (QBoost _w q)
= evalUnScoredDocs q
evalUnScoredDocs q@QPhrase{}
= normQuery q >>= evalUnScoredDocs
evalUnScoredDocs q@QBinary{}
= normQuery q >>= evalUnScoredDocs
evalUnScoredDocs q
= queryEvalError q
processQueryScoredDocs :: ProcessEnv -> Query -> IO (Either CmdError ScoredDocs)
processQueryScoredDocs = processQueryScoredResult evalScoredDocs'
evalScoredDocs :: Query -> Processor ScoredDocs
evalScoredDocs q
| isPrimaryQuery q
= do res <- forallCx (evalPrimary q)
aggregateToScoredResult res
evalScoredDocs (QRange lb ub)
= do res <- forallCx (evalRange lb ub)
aggregateToScoredResult res
evalScoredDocs (QSeq op qs)
| isLocalCxOp op
= do res <- forallCxLocal
( evalSeq' op
<$> mapM evalScoredRawDocs' qs )
aggregateToScoredResult res
| otherwise
= evalSeq op
<$> mapM evalScoredDocs' qs
evalScoredDocs (QContext cxs q)
= withCxs cxs $ evalScoredDocs' q
evalScoredDocs (QBoost w q)
= boost w <$> evalScoredDocs' q
evalScoredDocs q@QPhrase{}
= normQuery q >>= evalScoredDocs
evalScoredDocs q@QBinary{}
= normQuery q >>= evalScoredDocs
evalScoredDocs q
= queryEvalError q
evalScoredDocs' :: Query -> Processor ScoredDocs
evalScoredDocs' = evalScoredDocs
processQueryScoredWords :: ProcessEnv -> Query -> IO (Either CmdError ScoredWords)
processQueryScoredWords = processQueryScoredResult evalScoredWords'
evalScoredWords :: Query -> Processor ScoredWords
evalScoredWords q
| isPrimaryQuery q
= do res <- forallCx (evalPrimary q)
aggregateToScoredResult res
evalScoredWords (QRange lb ub)
= do res <- forallCx (evalRange lb ub)
aggregateToScoredResult res
evalScoredWords (QSeq Or qs)
= evalScoredWords' (last qs)
evalScoredWords (QSeq AndNot qs)
= evalScoredWords' (last qs)
evalScoredWords (QSeq And qs)
= do docs <- evalUnScoredDocs (mkQ $ init qs)
res <- evalScoredRawDocs ( last qs)
aggregateToScoredResult
$ fmap (filterByDocSet docs) res
where
mkQ [q'] = q'
mkQ qs' = QSeq And qs'
evalScoredWords (QSeq op qs)
| isLocalCxOp op
= do res <- forallCxLocal
( evalSeq' op
<$> mapM evalScoredRawDocs' qs )
aggregateToScoredResult res
evalScoredWords (QContext cxs q)
= withCxs cxs $ evalScoredWords' q
evalScoredWords (QBoost w q)
= boost w <$> evalScoredWords' q
evalScoredWords q@QPhrase{}
= normPhraseQuery q >>= evalScoredWords
evalScoredWords q@QBinary{}
= normQuery q >>= evalScoredWords
evalScoredWords q
= queryEvalError q
evalScoredWords' :: Query -> Processor ScoredWords
evalScoredWords' = evalScoredWords
evalScoredRawDocs :: Query -> Processor (ScoredCx ScoredRawDocs)
evalScoredRawDocs q
| isPrimaryQuery q
= forallCx (evalPrimary q)
evalScoredRawDocs (QRange lb ub)
= forallCx (evalRange lb ub)
evalScoredRawDocs (QSeq op qs)
= evalSeq' op
<$> mapM evalScoredRawDocs' qs
evalScoredRawDocs (QContext cxs q)
= restrictCxs cxs $ evalScoredRawDocs' q
evalScoredRawDocs (QBoost w q)
= boost w <$> evalScoredRawDocs' q
evalScoredRawDocs q@QPhrase{}
= normQuery q >>= evalScoredRawDocs
evalScoredRawDocs q@QBinary{}
= normQuery q >>= evalScoredRawDocs
evalScoredRawDocs q
= queryEvalError q
evalScoredRawDocs' :: Query -> Processor (ScoredCx ScoredRawDocs)
evalScoredRawDocs' = evalScoredRawDocs
processQueryScoredResult :: (q -> Processor r) -> ProcessEnv -> q -> IO (Either CmdError r)
processQueryScoredResult eval st q
= runErrorT . runReaderT (runProcessor $ eval q) $ st
normQuery :: Query -> Processor Query
normQuery (QPhrase op w)
= return . QSeq Phrase . L.map (QFullWord op) $ T.words w
normQuery q@(QBinary op _q1 _q2)
= return . QSeq op . collect op q $ []
where
collect
| isAssocOp op = collectAssoc
| isLeftAssocOp op = collectLeftAssoc
| otherwise = \ _op q' -> (q':)
normQuery q
= return q
normPhraseQuery :: Query -> Processor Query
normPhraseQuery (QPhrase op w)
= return . QSeq Phrase $ normPhrase (T.words w) []
where
normPhrase (x:[]) r = r ++ [QWord op x]
normPhrase (x:xs) r = normPhrase xs $
r ++ [QFullWord op x]
normPhrase [] r = r
normPhraseQuery q
= return q
collectAssoc :: BinOp -> Query -> [Query] -> [Query]
collectAssoc op (QBinary op' q1 q2)
| op == op'
= collectAssoc op q1 . collectAssoc op q2
collectAssoc _ q
= (q :)
collectLeftAssoc :: BinOp -> Query -> [Query] -> [Query]
collectLeftAssoc op (QBinary op' q1 q2)
| op == op'
= collectAssoc op q1 . (q2 :)
collectLeftAssoc _ q
= (q :)
isAssocOp :: BinOp -> Bool
isAssocOp AndNot = False
isAssocOp _ = True
isLeftAssocOp :: BinOp -> Bool
isLeftAssocOp = not . isAssocOp
isLocalCxOp :: BinOp -> Bool
isLocalCxOp Phrase = True
isLocalCxOp Follow{} = True
isLocalCxOp Near{} = True
isLocalCxOp _ = False
evalSeq :: (ScoredResult r) => BinOp -> [r] -> r
evalSeq Or = evalOr
evalSeq And = evalAnd
evalSeq AndNot = evalAndNot
evalSeq _op = const mempty
evalSeq' :: BinOp -> [ScoredCx ScoredRawDocs] -> ScoredCx ScoredRawDocs
evalSeq' Phrase = evalSequence id
evalSeq' (Follow d) = evalFollow id d
evalSeq' (Near d) = evalNear id d
evalSeq' op = evalSeq op
restrictCxs :: [Context] -> Processor r -> Processor r
restrictCxs cxs p
= do cxs0 <- getContexts
withCxs cxs $
local (setCx $ cxs0 `L.intersect` L.nub cxs) p
where
setCx cxs' cfg
= cfg {psContexts = cxs'}
withCxs :: [Context] -> Processor r -> Processor r
withCxs cxs p
= do icxs <- invalidContexts <$> getSchema
if L.null icxs
then local setCx $ p
else queryError 404
$ "mentioned context(s) do not exist: "
<> showText icxs
where
invalidContexts sc
= filter (\ c -> not (c `M.member` sc)) cxs
setCx cfg
= cfg {psContexts = L.nub cxs}
forallCx :: (ScoredResult r) => (Context -> Processor r) -> Processor r
forallCx action
= do cxs <- getContexts
res <- mapM action cxs
return $ mconcat res
forallCxLocal :: (ScoredResult r) => Processor r -> Processor r
forallCxLocal action
= do cxs <- getContexts
res <- mapM f' cxs
return $ mconcat res
where
f' cx
= withCxs [cx] action
isPrimaryQuery :: Query -> Bool
isPrimaryQuery QWord{} = True
isPrimaryQuery QFullWord{} = True
isPrimaryQuery _ = False
queryEvalError :: Query -> Processor r
queryEvalError q
= queryError 501 $ "Hunt.Query.Processor: query can't be evaluated " <> showText q
aggregateToScoredResult :: (Aggregate a (ScoredCx b), ScoredResult b) =>
a -> ProcessorT IO b
aggregateToScoredResult res
= do cxScores <- contextWeights <$> getSchema
return $ boostAndAggregateCx cxScores (aggregate res)
evalPrimary :: Query -> Context -> Processor (ScoredCx ScoredRawDocs)
evalPrimary (QWord QCase w) cx
= searchCx PrefixCase w cx
evalPrimary (QWord QNoCase w) cx
= searchCx PrefixNoCase w cx
evalPrimary (QWord QFuzzy w) cx
= searchCx PrefixNoCase w cx
evalPrimary (QFullWord QCase w) cx
= searchCx Case w cx
evalPrimary (QFullWord QNoCase w) cx
= searchCx NoCase w cx
evalPrimary (QFullWord QFuzzy w) cx
= searchCx NoCase w cx
evalPrimary q _cx
= queryError 501 $ "evalPrimary: not a primary query: " <> showText q
searchCx :: TextSearchOp -> Word -> Context -> Processor (ScoredCx ScoredRawDocs)
searchCx op w' cx
= do mw <- normQueryCx cx w'
case mw of
Nothing -> return $ fromCxRawResults []
(Just w) -> searchCx' w
where
searchCx' w
= do
limit <- asks (docLimit . psConfig)
ix <- getIx
rawr <- limitRawResult limit
<$> CIx.searchWithCxSc op cx w ix
return $ fromCxRawResults [(cx, rawr)]
evalRange :: Word -> Word -> Context -> Processor (ScoredCx ScoredRawDocs)
evalRange lb0 ub0 cx
= do mlb <- normQueryCx cx lb0
mub <- normQueryCx cx ub0
case (mlb, mub) of
(Just lb, Just ub) -> evalRange' lb ub
_ -> return $ fromCxRawResults []
where
evalRange' lb ub
= do
limit <- asks (docLimit . psConfig)
ix <- getIx
rawr <- limitRawResult limit
<$> CIx.lookupRangeCxSc cx lb ub ix
return $ fromCxRawResults [(cx, rawr)]