-- ----------------------------------------------------------------------------

{- |
  Module     : Holumbus.Query.Intermediate
  Copyright  : Copyright (C) 2007, 2008 Timo B. Huebel
  License    : MIT

  Maintainer : Timo B. Huebel (tbh@holumbus.org)
  Stability  : experimental
  Portability: portable
  Version    : 0.3

  The data type for intermediate results occuring during query processing.

-}

-- ----------------------------------------------------------------------------

{-# OPTIONS #-}

module Holumbus.Query.Intermediate 
(
  -- * The intermediate result type.
  Intermediate 

  -- * Construction
  , emptyIntermediate

  -- * Query
  , null
  , sizeIntermediate

  -- * Combine
  , union
  , difference
  , intersection  
  , unions
  
  -- * Conversion
  , fromList
  , toResult
)
where

import Prelude                  hiding (null)

import Data.Maybe

import qualified Data.List      as L

import Data.Map (Map)
import qualified Data.Map       as M

import Holumbus.Query.Result    hiding (null)

import Holumbus.Index.Common    hiding (toList, fromList)

-- ----------------------------------------------------------------------------

-- | The intermediate result used during query processing.

type Intermediate               = DocIdMap IntermediateContexts
type IntermediateContexts       = Map Context IntermediateWords
type IntermediateWords          = Map Word (WordInfo, Positions)

-- ----------------------------------------------------------------------------

-- | Create an empty intermediate result.

emptyIntermediate               :: Intermediate
emptyIntermediate               = emptyDocIdMap

-- | Check if the intermediate result is empty.

null                            :: Intermediate -> Bool
null                            = nullDocIdMap

-- | Returns the number of documents in the intermediate result.

sizeIntermediate                :: Intermediate -> Int
sizeIntermediate                = sizeDocIdMap

-- | Merges a bunch of intermediate results into one intermediate result by unioning them.

unions                          :: [Intermediate] -> Intermediate
unions                          = L.foldl' union emptyIntermediate

-- | Intersect two sets of intermediate results.

intersection                    :: Intermediate -> Intermediate -> Intermediate
intersection                    = intersectionWithDocIdMap combineContexts

-- | Union two sets of intermediate results.

union                           :: Intermediate -> Intermediate -> Intermediate
union                           = unionWithDocIdMap combineContexts

-- | Substract two sets of intermediate results.

difference                      :: Intermediate -> Intermediate -> Intermediate
difference                      = differenceDocIdMap

-- | Create an intermediate result from a list of words and their occurrences.

fromList :: Word -> Context -> RawResult -> Intermediate
-- Beware! This is extremly optimized and will not work for merging arbitrary intermediate results!
-- Based on resultByDocument from Holumbus.Index.Common

fromList t c os                 = mapDocIdMap transform $
                                  unionsWithDocIdMap (flip $ (:) . head)
                                                     (map insertWords os)
  where
  insertWords (w, o)            = mapDocIdMap (\p -> [(w, (WordInfo [t] 0.0 , p))]) o   
  transform w                   = M.singleton c (M.fromList w)

-- | Convert to a @Result@ by generating the 'WordHits' structure.

toResult                        :: HolDocuments d c => d c -> Intermediate -> Result c
toResult d im                   = Result (createDocHits d im) (createWordHits im)

-- | Create the doc hits structure from an intermediate result.

createDocHits                   :: HolDocuments d c => d c -> Intermediate -> DocHits c
createDocHits d im              = mapWithKeyDocIdMap transformDocs im
  where
  transformDocs did ic          = let doc = fromMaybe (Document "" "" Nothing) (lookupById d did) in
                                  (DocInfo doc 0.0, M.map (M.map (\(_, p) -> p)) ic)

-- | Create the word hits structure from an intermediate result.

createWordHits :: Intermediate -> WordHits
createWordHits im               = foldWithKeyDocIdMap transformDoc M.empty im
  where
  transformDoc d ic wh          = M.foldrWithKey transformContext wh ic
    where
    transformContext c iw wh'   = M.foldrWithKey insertWord wh' iw
      where
      insertWord w (wi, pos) wh''
                                = if terms wi == [""]
                                  then wh'' 
                                  else M.insertWith combineWordHits
                                           w
                                           (wi, M.singleton c (singletonDocIdMap d pos))
                                           wh''

-- | Combine two tuples with score and context hits.

combineWordHits                 :: (WordInfo, WordContextHits) ->
                                   (WordInfo, WordContextHits) -> (WordInfo, WordContextHits)
combineWordHits (i1, c1) (i2, c2)
                                = ( combineWordInfo i1 i2
                                  , M.unionWith (unionWithDocIdMap unionPos) c1 c2
                                  )

-- | Combine two tuples with score and context hits.

combineContexts                 :: IntermediateContexts -> IntermediateContexts -> IntermediateContexts
combineContexts                 = M.unionWith (M.unionWith merge)
  where
  merge (i1, p1) (i2, p2)       = ( combineWordInfo i1 i2
                                  , unionPos p1 p2
                                  )

-- | Combine two word informations.

combineWordInfo         :: WordInfo -> WordInfo -> WordInfo
combineWordInfo (WordInfo t1 s1) (WordInfo t2 s2)
                        = WordInfo (t1 ++ t2) (combineScore s1 s2)

-- | Combine two scores (just average between them).

combineScore            :: Score -> Score -> Score
combineScore s1 s2      = (s1 + s2) / 2.0

-- ----------------------------------------------------------------------------