module Holumbus.Query.Result
(
Result (..)
, DocHits
, DocContextHits
, DocWordHits
, WordHits
, WordContextHits
, WordDocHits
, DocInfo (..)
, WordInfo (..)
, Score
, emptyResult
, null
, sizeDocHits
, sizeWordHits
, maxScoreDocHits
, maxScoreWordHits
, getDocuments
, setDocScore
, setWordScore
, xpDocHits
, xpWordHits
)
where
import Prelude hiding (null)
import Control.DeepSeq
import Control.Monad ( liftM2 )
import Data.Binary ( Binary (..) )
import Data.Function
import Data.Map (Map)
import qualified
Data.Map as M
import qualified
Data.List as L
import Holumbus.Utility
import Holumbus.Index.Common
import Text.XML.HXT.Core
data Result a = Result
{ docHits :: (DocHits a)
, wordHits :: WordHits
}
deriving (Eq, Show)
data DocInfo a = DocInfo
{ document :: (Document a)
, docScore :: Score
}
deriving (Eq, Show)
data WordInfo = WordInfo
{ terms :: Terms
, wordScore :: Score
}
deriving (Eq, Show)
type DocHits a = DocIdMap (DocInfo a, DocContextHits)
type DocContextHits = Map Context DocWordHits
type DocWordHits = Map Word Positions
type WordHits = Map Word (WordInfo, WordContextHits)
type WordContextHits = Map Context WordDocHits
type WordDocHits = Occurrences
type Score = Float
type Terms = [String]
instance Binary a => Binary (Result a) where
put (Result dh wh) = put dh >> put wh
get = liftM2 Result get get
instance Binary a => Binary (DocInfo a) where
put (DocInfo d s) = put d >> put s
get = liftM2 DocInfo get get
instance Binary WordInfo where
put (WordInfo t s) = put t >> put s
get = liftM2 WordInfo get get
instance NFData a => NFData (Result a) where
rnf (Result dh wh) = rnf dh `seq` rnf wh
instance NFData a => NFData (DocInfo a) where
rnf (DocInfo d s) = rnf d `seq` rnf s
instance NFData WordInfo where
rnf (WordInfo t s) = rnf t `seq` rnf s
instance XmlPickler a => XmlPickler (Result a) where
xpickle = xpElem "result" $
xpWrap ( \ (dh, wh) -> Result dh wh
, \ (Result dh wh) -> (dh, wh)
) (xpPair xpDocHits xpWordHits)
instance XmlPickler a => XmlPickler (DocInfo a) where
xpickle = xpWrap ( \ (d, s) -> DocInfo d s
, \ (DocInfo d s) -> (d, s)
) xpDocInfo'
where
xpDocInfo' = xpPair xpickle (xpAttr "score" xpPrim)
instance XmlPickler WordInfo where
xpickle = xpWrap ( \ (t, s) -> WordInfo t s
, \ (WordInfo t s) -> (t, s)
) xpWordInfo
where
xpWordInfo = xpPair (xpAttr "term" xpTerms)
(xpAttr "score" xpPrim)
xpTerms = xpWrap (split ",", join ",") xpText0
xpDocHits :: XmlPickler a => PU (DocHits a)
xpDocHits = xpElem "dochits" $
xpWrap ( fromListDocIdMap
, toListSorted
) (xpList xpDocHit)
where
toListSorted = L.sortBy (compare `on` (docScore . fst . snd)) . toListDocIdMap
xpDocHit = xpElem "doc" $
xpPair (xpAttr "idref" xpDocId)
(xpPair xpickle xpDocContextHits)
xpDocContextHits :: PU DocContextHits
xpDocContextHits = xpWrap (M.fromList, M.toList) $
xpList xpDocContextHit
where
xpDocContextHit = xpElem "context" $
xpPair (xpAttr "name" xpText) xpDocWordHits
xpDocWordHits :: PU DocWordHits
xpDocWordHits = xpWrap (M.fromList, M.toList) (xpList xpDocWordHit)
where
xpDocWordHit = xpElem "word" $
xpPair (xpAttr "w" xpText) xpPositions
xpWordHits :: PU WordHits
xpWordHits = xpElem "wordhits" $
xpWrap (M.fromList, toListSorted) $
xpList xpWordHit
where
toListSorted = L.sortBy (compare `on` fst) . M.toList
xpWordHit = xpElem "word" $
xpPair (xpAttr "w" xpText)
(xpPair xpickle xpWordContextHits)
xpWordContextHits :: PU WordContextHits
xpWordContextHits = xpWrap (M.fromList, M.toList) $
xpList xpWordContextHit
where
xpWordContextHit = xpElem "context" $
xpPair (xpAttr "name" xpText) xpWordDocHits
xpWordDocHits :: PU WordDocHits
xpWordDocHits = xpOccurrences
emptyResult :: Result a
emptyResult = Result emptyDocIdMap M.empty
sizeDocHits :: Result a -> Int
sizeDocHits = sizeDocIdMap . docHits
sizeWordHits :: Result a -> Int
sizeWordHits = M.size . wordHits
maxScoreDocHits :: Result a -> Score
maxScoreDocHits = (foldDocIdMap (\(di, _) r -> max (docScore di) r) 0.0) . docHits
maxScoreWordHits :: Result a -> Score
maxScoreWordHits = (M.fold (\(wi, _) r -> max (wordScore wi) r) 0.0) . wordHits
null :: Result a -> Bool
null = nullDocIdMap . docHits
setDocScore :: Score -> DocInfo a -> DocInfo a
setDocScore s (DocInfo d _)
= DocInfo d s
setWordScore :: Score -> WordInfo -> WordInfo
setWordScore s (WordInfo t _)
= WordInfo t s
getDocuments :: Result a -> [Document a]
getDocuments r = map (document . fst . snd) $
toListDocIdMap (docHits r)