{-# OPTIONS -fno-warn-orphans #-}

-- ----------------------------------------------------------------------------

{- |
  Module     : Holumbus.Index.Common.Occurences
  Copyright  : Copyright (C) 2011 Sebastian M. Schlatt, Timo B. Huebel, Uwe Schmidt
  License    : MIT

  Maintainer : Timo B. Huebel (tbh@holumbus.org)
  Stability  : experimental
  Portability: none portable

  The Occurences data type

-}

-- ----------------------------------------------------------------------------

module Holumbus.Index.Common.Occurences
where

import Control.DeepSeq

import Data.Binary              ( Binary (..) )
import qualified
       Data.Binary              as B

import qualified Data.EnumMap   as IM
import qualified Data.EnumSet   as IS

import Holumbus.Index.Common.BasicTypes
import Holumbus.Index.Common.DocId
import Holumbus.Index.Common.DocIdMap

import Text.XML.HXT.Core

-- ------------------------------------------------------------

-- | The occurrences in a number of documents.
-- A mapping from document ids to the positions in the document.

type Occurrences        = DocIdMap Positions

-- | Create an empty set of positions.
emptyOccurrences        :: Occurrences
emptyOccurrences        = IM.empty

-- | Create an empty set of positions.
singletonOccurrence     :: DocId -> Position -> Occurrences
singletonOccurrence d p = insertOccurrence d p IM.empty

-- | Test on empty set of positions.
nullOccurrences         :: Occurrences -> Bool
nullOccurrences         = IM.null

-- | Determine the number of positions in a set of occurrences.
sizeOccurrences         :: Occurrences -> Int
sizeOccurrences         = IM.fold ((+) . IS.size) 0

insertOccurrence        :: DocId -> Position -> Occurrences -> Occurrences
insertOccurrence d p    = IM.insertWith IS.union d (singletonPos p)

deleteOccurrence        :: DocId -> Position -> Occurrences -> Occurrences
deleteOccurrence d p    = substractOccurrences (IM.singleton d (singletonPos p))

updateOccurrences       :: (DocId -> DocId) -> Occurrences -> Occurrences
updateOccurrences f     = IM.foldWithKey (\ d ps res -> IM.insertWith IS.union (f d) ps res) emptyOccurrences

-- | Merge two occurrences.
mergeOccurrences        :: Occurrences -> Occurrences -> Occurrences
mergeOccurrences        = IM.unionWith IS.union

diffOccurrences         :: Occurrences -> Occurrences -> Occurrences
diffOccurrences          = IM.difference

-- | Substract occurrences from some other occurrences.
substractOccurrences    :: Occurrences -> Occurrences -> Occurrences
substractOccurrences    = IM.differenceWith substractPositions
  where
  substractPositions p1 p2
                        = if IS.null diffPos
                          then Nothing
                          else Just diffPos
      where
      diffPos                   = IS.difference p1 p2

-- | The XML pickler for the occurrences of a word.

xpOccurrences           :: PU Occurrences
xpOccurrences           = xpWrap (IM.fromList, IM.toList)
                                 (xpList xpOccurrence)
  where
  xpOccurrence          = xpElem "doc" $
                          xpPair (xpAttr "idref" xpDocId)
                                 xpPositions

-- ------------------------------------------------------------

-- | The positions of the word in the document.
type Positions                  = IS.EnumSet Position

emptyPos                :: Positions
emptyPos                = IS.empty

singletonPos            :: Position -> Positions
singletonPos            = IS.singleton

memberPos               :: Position -> Positions -> Bool
memberPos               = IS.member

toAscListPos            :: Positions -> [Position]
toAscListPos            = IS.toAscList

fromListPos             :: [Position] -> Positions
fromListPos             = IS.fromList

sizePos                 :: Positions -> Int
sizePos                 = IS.size

unionPos                :: Positions -> Positions -> Positions
unionPos                = IS.union

foldPos                 :: (Position -> r -> r) -> r -> Positions -> r
foldPos                 = IS.fold

-- | The XML pickler for a set of positions.
xpPositions             :: PU Positions
xpPositions             = xpWrap ( IS.fromList . (map read) . words
                                 , unwords . (map show) . IS.toList
                                 ) xpText

instance (NFData v, Enum v) => NFData (IS.EnumSet v) where
    rnf                   = rnf . IS.toList

instance (Binary v, Enum v) => Binary (IS.EnumSet v) where
    put                   = B.put . IS.toList
    get                   = B.get >>= return . IS.fromList

-- ------------------------------------------------------------