module Holumbus.Query.Intermediate
(
Intermediate
, emptyIntermediate
, null
, sizeIntermediate
, union
, difference
, intersection
, unions
, fromList
, toResult
)
where
import Prelude hiding (null)
import Data.Maybe
import qualified Data.List as L
import Data.Map (Map)
import qualified Data.Map as M
import Holumbus.Query.Result hiding (null)
import Holumbus.Index.Common hiding (toList, fromList)
type Intermediate = DocIdMap IntermediateContexts
type IntermediateContexts = Map Context IntermediateWords
type IntermediateWords = Map Word (WordInfo, Positions)
emptyIntermediate :: Intermediate
emptyIntermediate = emptyDocIdMap
null :: Intermediate -> Bool
null = nullDocIdMap
sizeIntermediate :: Intermediate -> Int
sizeIntermediate = sizeDocIdMap
unions :: [Intermediate] -> Intermediate
unions = L.foldl' union emptyIntermediate
intersection :: Intermediate -> Intermediate -> Intermediate
intersection = intersectionWithDocIdMap combineContexts
union :: Intermediate -> Intermediate -> Intermediate
union = unionWithDocIdMap combineContexts
difference :: Intermediate -> Intermediate -> Intermediate
difference = differenceDocIdMap
fromList :: Word -> Context -> RawResult -> Intermediate
fromList t c os = mapDocIdMap transform $
unionsWithDocIdMap (flip $ (:) . head)
(map insertWords os)
where
insertWords (w, o) = mapDocIdMap (\p -> [(w, (WordInfo [t] 0.0 , p))]) o
transform w = M.singleton c (M.fromList w)
toResult :: HolDocuments d c => d c -> Intermediate -> Result c
toResult d im = Result (createDocHits d im) (createWordHits im)
createDocHits :: HolDocuments d c => d c -> Intermediate -> DocHits c
createDocHits d im = mapWithKeyDocIdMap transformDocs im
where
transformDocs did ic = let doc = fromMaybe (Document "" "" Nothing) (lookupById d did) in
(DocInfo doc 0.0, M.map (M.map (\(_, p) -> p)) ic)
createWordHits :: Intermediate -> WordHits
createWordHits im = foldWithKeyDocIdMap transformDoc M.empty im
where
transformDoc d ic wh = M.foldrWithKey transformContext wh ic
where
transformContext c iw wh' = M.foldrWithKey insertWord wh' iw
where
insertWord w (wi, pos) wh''
= if terms wi == [""]
then wh''
else M.insertWith combineWordHits
w
(wi, M.singleton c (singletonDocIdMap d pos))
wh''
combineWordHits :: (WordInfo, WordContextHits) ->
(WordInfo, WordContextHits) -> (WordInfo, WordContextHits)
combineWordHits (i1, c1) (i2, c2)
= ( combineWordInfo i1 i2
, M.unionWith (unionWithDocIdMap unionPos) c1 c2
)
combineContexts :: IntermediateContexts -> IntermediateContexts -> IntermediateContexts
combineContexts = M.unionWith (M.unionWith merge)
where
merge (i1, p1) (i2, p2) = ( combineWordInfo i1 i2
, unionPos p1 p2
)
combineWordInfo :: WordInfo -> WordInfo -> WordInfo
combineWordInfo (WordInfo t1 s1) (WordInfo t2 s2)
= WordInfo (t1 ++ t2) (combineScore s1 s2)
combineScore :: Score -> Score -> Score
combineScore s1 s2 = (s1 + s2) / 2.0