module Hunt.Query.Intermediate
( ScoredResult(..)
, Aggregate(..)
, ScoredDocs
, ScoredWords
, ScoredContexts
, ScoredOccs
, ScoredRawDocs
, ScoredCx
, UnScoredDocs
, RankedDoc (..)
, toScoredDocs
, boostAndAggregateCx
, fromCxRawResults
, fromRawResult
, limitRawResult
, limitCxRawResults
, contextWeights
, filterByDocSet
, toDocIdSet
, toDocsResult
, toDocumentResultPage
, toWordsResult
, evalSequence
, evalFollow
, evalNear
, evalOr
, evalAnd
, evalAndNot
, evalBoost
, evalPrim
)
where
import Prelude hiding (null)
import Control.Applicative hiding (empty)
import Control.Arrow (second, (***))
import qualified Data.LimitedPriorityQueue as Q
import qualified Data.List as L
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe
import Data.Aeson
import qualified Data.HashMap.Strict as HM
import Hunt.Query.Result hiding (null)
import Hunt.Common
import qualified Hunt.Common.DocIdMap as DM
import qualified Hunt.Common.DocIdSet as DS
import Hunt.Common.Document (DocumentWrapper (..))
import qualified Hunt.Common.Occurrences as Occ
import qualified Hunt.Common.Positions as Pos
import Hunt.DocTable (DocTable)
import qualified Hunt.DocTable as Dt
class Monoid a => ScoredResult a where
boost :: Score -> a -> a
nullSC :: a -> Bool
differenceSC :: a -> a -> a
intersectSC :: a -> a -> a
intersectDisplSC :: Int -> a -> a -> a
intersectFuzzySC :: Int -> Int -> a -> a -> a
newtype ScoredDocs
= SDS (DocIdMap Score)
deriving (Show)
instance Monoid ScoredDocs where
mempty
= SDS DM.empty
mappend (SDS m1) (SDS m2)
= SDS $ DM.unionWith (<>) m1 m2
instance ScoredResult ScoredDocs where
boost b (SDS m1)
= SDS $ DM.map (b *) m1
nullSC (SDS m1)
= DM.null m1
differenceSC (SDS m1) (SDS m2)
= SDS $ DM.difference m1 m2
intersectSC (SDS m1) (SDS m2)
= SDS $ DM.intersectionWith (+) m1 m2
intersectDisplSC _disp
= intersectSC
intersectFuzzySC _lb _ub
= intersectSC
toScoredDocs :: Occurrences -> ScoredDocs
toScoredDocs os
= SDS $ DM.map toScore os
where
toScore = mkScore . fromIntegral . Pos.size
newtype UnScoredDocs
= UDS DS.DocIdSet
deriving (Show, Monoid)
instance ScoredResult UnScoredDocs where
boost _b uds
= uds
nullSC (UDS s)
= DS.null s
differenceSC (UDS s1) (UDS s2)
= UDS $ DS.difference s1 s2
intersectSC (UDS s1) (UDS s2)
= UDS $ DS.intersection s1 s2
intersectDisplSC _disp
= intersectSC
intersectFuzzySC _lb _ub
= intersectSC
toUnScoredDocs :: Occurrences -> UnScoredDocs
toUnScoredDocs os
= UDS . DS.fromList . DM.keys $ os
toDocIdSet :: UnScoredDocs -> DS.DocIdSet
toDocIdSet (UDS ds) = ds
newtype ScoredWords
= SWS (Map Word Score)
deriving (Show)
type ScoredContexts = ScoredWords
instance Monoid ScoredWords where
mempty
= SWS M.empty
mappend (SWS m1) (SWS m2)
= SWS $ M.unionWith (<>) m1 m2
instance ScoredResult ScoredWords where
boost b (SWS m1)
= SWS $ M.map (b *) m1
nullSC (SWS m1)
= M.null m1
differenceSC (SWS m1) (SWS m2)
= SWS $ M.difference m1 m2
intersectSC (SWS m1) (SWS m2)
= SWS $ M.intersectionWith (+) m1 m2
intersectDisplSC _disp
= intersectSC
intersectFuzzySC _lb _ub
= intersectSC
data ScoredOccs
= SCO Score Occurrences
deriving (Show)
instance Monoid ScoredOccs where
mempty
= SCO defScore DM.empty
mappend (SCO s1 d1) (SCO s2 d2)
= SCO ((s1 + s2) / 2.0) (DM.unionWith Pos.union d1 d2)
instance ScoredResult ScoredOccs where
boost b (SCO s d)
= SCO (b * s) d
nullSC (SCO _s d)
= DM.null d
differenceSC (SCO s1 d1) (SCO _s2 d2)
= SCO s1 (DM.difference d1 d2)
intersectSC (SCO s1 d1) (SCO s2 d2)
= SCO (s1 + s2) (Occ.intersectOccurrences Pos.union d1 d2)
intersectDisplSC disp (SCO s1 d1) (SCO s2 d2)
= SCO (s1 + s2) (Occ.intersectOccurrences (Pos.intersectionWithDispl disp) d1 d2)
intersectFuzzySC lb ub (SCO s1 d1) (SCO s2 d2)
= SCO (s1 + s2) (Occ.intersectOccurrences (Pos.intersectionWithIntervall lb ub) d1 d2)
newtype ScoredRawDocs
= SRD [([Word], ScoredOccs)]
deriving (Show)
instance Monoid ScoredRawDocs where
mempty
= SRD []
mappend (SRD xs1) (SRD xs2)
= SRD $ xs1 ++ xs2
instance ScoredResult ScoredRawDocs where
boost b (SRD xs)
= SRD $ L.map (second (boost b)) xs
nullSC (SRD xs)
= L.null xs
differenceSC (SRD xs1) (SRD xs2)
= srd $ [(ws1, SCO sc1 (diff occ1 xs2)) | (ws1, SCO sc1 occ1) <- xs1]
where
diff occ xs
= L.foldl op occ xs
where
op occ' (_ws2, SCO _sc2 occ2)
= DM.difference occ' occ2
intersectSC
= intersectSRD intersectSC
intersectDisplSC d
= intersectSRD $ intersectDisplSC d
intersectFuzzySC lb ub
= intersectSRD $ intersectFuzzySC lb ub
intersectSRD :: (ScoredOccs -> ScoredOccs -> ScoredOccs)
-> ScoredRawDocs -> ScoredRawDocs -> ScoredRawDocs
intersectSRD interSRD (SRD xs1) (SRD xs2)
= srd $ [ (ws1 ++ ws2, interSRD socc1 socc2)
| (ws1, socc1) <- xs1
, (ws2, socc2) <- xs2
]
srd :: [([Word], ScoredOccs)] -> ScoredRawDocs
srd
= SRD . L.filter (not . nullSC . snd)
filterByDocSet :: UnScoredDocs -> ScoredRawDocs -> ScoredRawDocs
filterByDocSet (UDS ds) (SRD xs)
= SRD $ concatMap filterDocs xs
where
filterDocs (ws, SCO sc occ)
| Occ.null occ'
= []
| otherwise
= [(ws, SCO sc occ')]
where
occ' = DM.filterWithKey p occ
where
p k _v = k `DS.member` ds
newtype ScoredCx a
= SCX (Map Context a)
deriving (Show)
instance Functor ScoredCx where
fmap f (SCX m) = SCX $ M.map f m
instance Monoid a => Monoid (ScoredCx a) where
mempty
= SCX M.empty
mappend (SCX m1) (SCX m2)
= SCX $ M.unionWith (<>) m1 m2
instance ScoredResult a => ScoredResult (ScoredCx a) where
boost b (SCX m)
= SCX $ M.map (boost b) m
nullSC (SCX m)
= M.null m
differenceSC
= binopSCX differenceSC
intersectSC
= binopSCX intersectSC
intersectDisplSC d
= binopSCX $ intersectDisplSC d
intersectFuzzySC lb ub
= binopSCX $ intersectFuzzySC lb ub
boostAndAggregateCx :: ScoredResult a => ScoredContexts -> ScoredCx a -> a
boostAndAggregateCx (SWS sm) (SCX m)
= M.foldlWithKey op mempty m
where
op res k x
= boost b x <> res
where
b = fromMaybe defScore $ M.lookup k sm
contextWeights :: Schema -> ScoredContexts
contextWeights s
= SWS $ M.map cxWeight s
binopSCX :: ScoredResult a => (a -> a -> a) -> (ScoredCx a -> ScoredCx a -> ScoredCx a)
binopSCX op (SCX m1) (SCX m2)
= scx $ M.mapWithKey op' m1
where
op' cx1 sr1
= sr1 `op` sr2
where
sr2 = fromMaybe mempty $ M.lookup cx1 m2
scx :: ScoredResult a => Map Word a -> ScoredCx a
scx = SCX . M.filter (not . nullSC)
type CxRawResults = [(Context, RawScoredResult)]
fromCxRawResults :: CxRawResults -> ScoredCx ScoredRawDocs
fromCxRawResults crs
=
res
where
res = mconcat $ L.map (uncurry $ fromRawResult) crs
fromRawResult :: Context -> RawScoredResult -> ScoredCx ScoredRawDocs
fromRawResult cx rr
= SCX $ M.singleton cx sr
where
sr = SRD $ L.map toScored rr
where
toScored (w, (sc, occ))
= ([w], SCO sc occ)
limitCxRawResults :: Int -> CxRawResults -> CxRawResults
limitCxRawResults mx
= L.map (second $ limitRawResult mx)
limitRawResult :: Int -> RawScoredResult -> RawScoredResult
limitRawResult maxDocs rs
| maxDocs <= 0 = rs
| otherwise = takeDocs maxDocs rs
where
takeDocs _ xs@[]
= xs
takeDocs _ xs@[_]
= xs
takeDocs mx (r@(_w, (_sc, occ)) : xs)
= case DM.sizeWithLimit mx occ of
Nothing -> [r]
Just s -> let mx' = mx s in
if mx' <= 0
then [r]
else r : takeDocs mx' xs
toDocsResult :: (Applicative m, Monad m, DocTable dt) =>
dt -> ScoredDocs -> m [RankedDoc]
toDocsResult dt (SDS m)
= mapM toDoc (DM.toList m)
where
toDoc (did, sc)
= (toD . fromJust') <$> Dt.lookup did dt
where
toD d'
= RD (wght d * sc, d)
where
d = unwrap d'
fromJust' (Just x) = x
fromJust' Nothing = error "Intermediate.toDocsResult: undefined"
newtype RankedDoc = RD {unRD :: (Score, Document)}
deriving Show
instance Eq RankedDoc where
(RD (sc1,d1)) == (RD (sc2,d2))
= sc1 == sc2
&&
uri d1 == uri d2
instance Ord RankedDoc where
(RD (sc1,d1)) `compare` (RD (sc2,d2))
= case sc1 `compare` sc2 of
EQ -> uri d2 `compare` uri d1
r -> r
instance ToJSON RankedDoc where
toJSON (RD (c,d))
= addScore $ toJSON d
where
addScore (Object hm)
= Object $ HM.insert "score" (toJSON c) hm
addScore _
= error "toJSON rankedDoc: propably a bug introduced with #75"
toDocumentResultPage :: Int -> Int -> [RankedDoc] -> [RankedDoc]
toDocumentResultPage = Q.pageList
data RankedWord = RW Score Word
deriving Eq
instance Ord RankedWord where
(RW s1 w1) `compare` (RW s2 w2)
= case s1 `compare` s2 of
EQ -> w2 `compare` w1
r -> r
toWordsResult :: Int -> ScoredWords -> [(Word, Score)]
toWordsResult len (SWS m)
= map (\ (RW s w) -> (w, s))
. Q.toList 0 len
. M.foldWithKey (\ w s -> Q.insert (RW s w)) (Q.mkQueue len)
$ m
class Aggregate a b where
aggregate :: a -> b
instance Aggregate a a where
aggregate = id
instance Aggregate ScoredDocs Score where
aggregate (SDS m)
= DM.foldr (<>) defScore m
instance Aggregate ScoredOccs ScoredDocs where
aggregate (SCO sc occ)
= SDS $ DM.map toScore occ
where
toScore = (sc *) . mkScore . fromIntegral . Pos.size
instance Aggregate ScoredOccs UnScoredDocs where
aggregate (SCO _sc occ)
= toUnScoredDocs occ
instance Aggregate ScoredOccs Score where
aggregate = agg2 . agg1
where
agg1 :: ScoredOccs -> ScoredDocs
agg1 = aggregate
agg2 :: ScoredDocs -> Score
agg2 = aggregate
instance Aggregate ScoredRawDocs ScoredDocs where
aggregate (SRD xs)
= L.foldl (<>) mempty
$ L.map (aggregate . snd) xs
instance Aggregate ScoredRawDocs UnScoredDocs where
aggregate (SRD xs)
= L.foldl (<>) mempty
$ L.map (aggregate . snd) xs
instance Aggregate ScoredRawDocs ScoredWords where
aggregate (SRD xs)
= SWS
$ L.foldl (\ res (w, sc) -> M.insertWith (+) w sc res) M.empty
$ L.map (L.last *** aggregate) xs
instance Aggregate a b => Aggregate (ScoredCx a) (ScoredCx b) where
aggregate (SCX m)
= SCX $ M.map aggregate m
evalSequence :: (ScoredResult r, Aggregate ScoredRawDocs r) =>
(ScoredCx ScoredRawDocs -> ScoredCx r) ->
[ScoredCx ScoredRawDocs] -> ScoredCx r
evalSequence _aggr []
= mempty
evalSequence aggr [r1]
= aggr r1
evalSequence aggr (r1 : rs)
= aggr $ L.foldl op r1 (L.zip [(1::Int)..] rs)
where
op acc (d, r2) = intersectDisplSC d acc r2
evalFollow :: (ScoredResult r, Aggregate ScoredRawDocs r) =>
(ScoredCx ScoredRawDocs -> ScoredCx r) ->
Int ->
[ScoredCx ScoredRawDocs] -> ScoredCx r
evalFollow _aggr _d []
= mempty
evalFollow aggr _d [r1]
= aggr r1
evalFollow aggr dp (r1 : rs)
= aggr $ L.foldl op r1 (L.zip [dp, (2 * dp) ..] rs)
where
op acc (d, r2) = intersectFuzzySC 1 d acc r2
evalNear :: (ScoredResult r, Aggregate ScoredRawDocs r) =>
(ScoredCx ScoredRawDocs -> ScoredCx r) ->
Int ->
[ScoredCx ScoredRawDocs] -> ScoredCx r
evalNear _aggr _d []
= mempty
evalNear aggr _d [r1]
= aggr r1
evalNear aggr dp (r1 : rs)
= aggr $ L.foldl op r1 (L.zip [dp, (2 * dp) ..] rs)
where
op acc (d, r2) = intersectFuzzySC (d) d acc r2
evalOr, evalAnd, evalAndNot :: ScoredResult a => [a] -> a
evalOr = evalBinary (<>)
evalAnd = evalBinary intersectSC
evalAndNot = evalBinary differenceSC
evalBinary :: ScoredResult a => (a -> a -> a) -> [a] -> a
evalBinary _ []
= mempty
evalBinary _ [r]
= r
evalBinary op rs
= L.foldl1 op rs
evalBoost :: ScoredResult a => Score -> a -> a
evalBoost = boost
evalPrim :: Aggregate (ScoredCx ScoredRawDocs) (ScoredCx r) => ScoredCx ScoredRawDocs -> ScoredCx r
evalPrim = aggregate