-- ----------------------------------------------------------------------------

{- |
  Module     : Holumbus.Query.Result
  Copyright  : Copyright (C) 2007 Timo B. Huebel
  License    : MIT

  Maintainer : Timo B. Huebel (tbh@holumbus.org)
  Stability  : experimental
  Portability: portable

  The data type for results of Holumbus queries.

  The result of a query is defined in terms of two partial results, 
  the documents containing the search terms and the words which 
  are possible completions of the serach terms.

-}

-- ----------------------------------------------------------------------------

module Holumbus.Query.Result 
  (
  -- * Result data types
  Result (..)
  , DocHits
  , DocContextHits
  , DocWordHits
  , WordHits
  , WordContextHits
  , WordDocHits
  , DocInfo (..)
  , WordInfo (..)
  , Score
  
  -- * Construction
  , emptyResult

  -- * Query
  , null
  , sizeDocHits
  , sizeWordHits
  , maxScoreDocHits
  , maxScoreWordHits
  , getDocuments

  -- * Transform
  , setDocScore
  , setWordScore
  
  -- * Picklers
  , xpDocHits
  , xpWordHits
  )
where

import Prelude                  hiding (null)

import Control.DeepSeq
import Control.Monad            ( liftM2 )

import Data.Binary              ( Binary (..) )
import Data.Function
import Data.Map (Map)
import qualified
       Data.Map as M
import qualified
       Data.List as L

import Holumbus.Utility
import Holumbus.Index.Common

import Text.XML.HXT.Core

-- ----------------------------------------------------------------------------

-- | The combined result type for Holumbus queries.
data Result a           = Result        
                          { docHits  :: (DocHits a)  -- ^ The documents matching the query.
                          , wordHits :: WordHits     -- ^ The words which are completions of the query terms.
                          }
                          deriving (Eq, Show)

-- | Information about an document.
data DocInfo a          = DocInfo 
                          { document :: (Document a) -- ^ The document itself.
                          , docScore :: Score        -- ^ The score for the document (initial score for all documents is @0.0@).
                          }
                          deriving (Eq, Show)

-- | Information about a word.
data WordInfo           = WordInfo 
                          { terms     :: Terms    -- ^ The search terms that led to this very word.
                          , wordScore :: Score    -- ^ The frequency of the word in the document for a context.
                          }
                          deriving (Eq, Show)

-- | A mapping from a document to it's score and the contexts where it was found.
type DocHits a          = DocIdMap (DocInfo a, DocContextHits)

-- | A mapping from a context to the words of the document that were found in this context.
type DocContextHits     = Map Context DocWordHits

-- | A mapping from a word of the document in a specific context to it's positions.
type DocWordHits        = Map Word Positions

-- | A mapping from a word to it's score and the contexts where it was found.
type WordHits           = Map Word (WordInfo, WordContextHits)

-- | A mapping from a context to the documents that contain the word that were found in this context.
type WordContextHits    = Map Context WordDocHits

-- | A mapping from a document containing the word to the positions of the word.
type WordDocHits        = Occurrences

-- | The score of a hit (either a document hit or a word hit).
type Score              = Float

-- | The original search terms entered by the user.
type Terms              = [String]

-- ----------------------------------------------------------------------------

instance Binary a => Binary (Result a) where
  put (Result dh wh)    = put dh >> put wh
  get                   = liftM2 Result get get

instance Binary a => Binary (DocInfo a) where
  put (DocInfo d s)     = put d >> put s
  get                   = liftM2 DocInfo get get

instance Binary WordInfo where
  put (WordInfo t s)    = put t >> put s
  get                   = liftM2 WordInfo get get

instance NFData a => NFData (Result a) where
  rnf (Result dh wh)    = rnf dh `seq` rnf wh

instance NFData a => NFData (DocInfo a) where
  rnf (DocInfo d s)     = rnf d `seq` rnf s

instance NFData WordInfo where
  rnf (WordInfo t s)    = rnf t `seq` rnf s

instance XmlPickler a => XmlPickler (Result a) where
  xpickle               = xpElem "result" $ 
                          xpWrap ( \ (dh, wh) -> Result dh wh
                                 , \ (Result dh wh) -> (dh, wh)
                                 ) (xpPair xpDocHits xpWordHits)

instance XmlPickler a => XmlPickler (DocInfo a) where
  xpickle               = xpWrap ( \ (d, s) -> DocInfo d s
                                 , \ (DocInfo d s) -> (d, s)
                                 ) xpDocInfo'
    where
    xpDocInfo'          = xpPair xpickle (xpAttr "score" xpPrim)

instance XmlPickler WordInfo where
  xpickle               = xpWrap ( \ (t, s) -> WordInfo t s
                                 , \ (WordInfo t s) -> (t, s)
                                 ) xpWordInfo
    where
    xpWordInfo          = xpPair (xpAttr "term" xpTerms)
                                 (xpAttr "score" xpPrim)
    xpTerms             = xpWrap (split ",", join ",") xpText0

-- ----------------------------------------------------------------------------

-- | The XML pickler for the document hits. Will be sorted by score.
xpDocHits               :: XmlPickler a => PU (DocHits a)
xpDocHits               = xpElem "dochits" $
                          xpWrap ( fromListDocIdMap
                                 , toListSorted
                                 ) (xpList xpDocHit)
  where
  toListSorted          = L.sortBy (compare `on` (docScore . fst . snd)) . toListDocIdMap -- Sort by score
  xpDocHit              = xpElem "doc" $
                          xpPair (xpAttr "idref" xpDocId)
                                 (xpPair xpickle xpDocContextHits)

-- | The XML pickler for the contexts in which the documents were found.
xpDocContextHits        :: PU DocContextHits
xpDocContextHits        = xpWrap (M.fromList, M.toList) $
                          xpList xpDocContextHit
  where
  xpDocContextHit       = xpElem "context" $
                          xpPair (xpAttr "name" xpText) xpDocWordHits

-- | The XML pickler for the words and positions found in a document.
xpDocWordHits           :: PU DocWordHits
xpDocWordHits           = xpWrap (M.fromList, M.toList) (xpList xpDocWordHit)
  where
  xpDocWordHit          = xpElem "word" $
                          xpPair (xpAttr "w" xpText) xpPositions

-- | The XML pickler for the word hits. Will be sorted alphabetically by the words.
xpWordHits              :: PU WordHits
xpWordHits              = xpElem "wordhits" $
                          xpWrap (M.fromList, toListSorted) $
                          xpList xpWordHit
  where
  toListSorted          = L.sortBy (compare `on` fst) . M.toList -- Sort by word
  xpWordHit             = xpElem "word" $
                          xpPair (xpAttr "w" xpText)
                                 (xpPair xpickle xpWordContextHits)

-- | The XML pickler for the contexts in which the words were found.
xpWordContextHits       :: PU WordContextHits
xpWordContextHits       = xpWrap (M.fromList, M.toList) $
                          xpList xpWordContextHit
  where
  xpWordContextHit      = xpElem "context" $
                          xpPair (xpAttr "name" xpText) xpWordDocHits

-- | The XML pickler for the documents and positions where the word occurs (reusing existing pickler).
xpWordDocHits           :: PU WordDocHits
xpWordDocHits           = xpOccurrences

-- ----------------------------------------------------------------------------

-- | Create an empty result.
emptyResult             :: Result a
emptyResult             = Result emptyDocIdMap M.empty

-- | Query the number of documents in a result.
sizeDocHits             :: Result a -> Int
sizeDocHits             = sizeDocIdMap . docHits

-- | Query the number of documents in a result.
sizeWordHits            :: Result a -> Int
sizeWordHits            = M.size . wordHits

-- | Query the maximum score of the documents.
maxScoreDocHits         :: Result a -> Score
maxScoreDocHits         = (foldDocIdMap (\(di, _) r -> max (docScore di) r) 0.0) . docHits

-- | Query the maximum score of the words.
maxScoreWordHits        :: Result a -> Score
maxScoreWordHits        = (M.fold (\(wi, _) r -> max (wordScore wi) r) 0.0) . wordHits

-- | Test if the result contains anything.
null                    :: Result a -> Bool
null                    = nullDocIdMap . docHits

-- | Set the score in a document info.
setDocScore             :: Score -> DocInfo a -> DocInfo a
setDocScore s (DocInfo d _)
                        = DocInfo d s

-- | Set the score in a word info.
setWordScore            :: Score -> WordInfo -> WordInfo
setWordScore s (WordInfo t _)
                        = WordInfo t s

-- | Extract all documents from a result
getDocuments            :: Result a -> [Document a]
getDocuments r          = map (document . fst . snd) $
                          toListDocIdMap (docHits r)

-- ----------------------------------------------------------------------------