{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}

-- ----------------------------------------------------------------------------

{- |
  Module     : Holumbus.Index.CompactDocuments
  Copyright  : Copyright (C) 2007-2010 Sebastian M. Schlatt, Timo B. Huebel
  License    : MIT

  Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
  Stability  : experimental
  Portability: MultiParamTypeClasses FlexibleInstances

  A simple version of Holumbus.Index.Documents.
  This implementation is only for reading a document table in the search part of an application.
  The mapping of URIs to DocIds is only required during index building, not when accessing the index.
  So this 2. mapping is removed in this implementation for saving space
-}

-- ----------------------------------------------------------------------------

module Holumbus.Index.CompactSmallDocuments 
(
  -- * Documents type
  SmallDocuments (..)

  -- * Construction
  , emptyDocuments
  , singleton

  -- * Conversion
  , docTable2smallDocTable
)
where

import           Control.DeepSeq

import           Data.Binary                            ( Binary )
import qualified Data.Binary                            as B

import           Holumbus.Index.Common
import qualified Holumbus.Index.CompactDocuments        as CD

import           Text.XML.HXT.Core

-- ----------------------------------------------------------------------------

-- | The table to store the document descriptions
--
-- This table does not contain the reverse map from URIs do DocIds,
-- this reverse map is only needed when crawling, not for searching the index.
-- As a consequence, most of the indes operations are not implemented
--
-- see also 'Holumbus.Index.CompactDocuments.Documents' data type

newtype SmallDocuments a        = SmallDocuments
                                  { idToSmallDoc   :: CD.DocMap a -- ^ A mapping from a doc id
                                                                  --   to the document itself.
                                  }

-- ----------------------------------------------------------------------------

instance (Binary a, HolIndex i) => HolDocIndex SmallDocuments a i where
    defragmentDocIndex          = notImpl
{-
    defragmentDocIndex dt ix    = (dt1, ix1)
        where
          dt1                   = editDocIds editId dt
          ix1                   = updateDocIds' editId ix
          editId i              = fromJust . lookupDocIdMap i $ idMap
          idMap                 = fromListDocIdMap . flip zip (map mkDocId [1..]) . keysDocIdMap . toMap $ dt
-}

    unionDocIndex dt1 ix1 dt2 ix2
                                = (dt, ix)
        where
          dt                    = unionDocs     dt1  dt2
          ix                    = mergeIndexes  ix1  ix2

-- ----------------------------------------------------------------------------

instance Binary a => HolDocuments SmallDocuments a where
  sizeDocs                      = sizeDocIdMap . idToSmallDoc
  
  lookupById  d i               = maybe (fail "") return
                                  . fmap CD.toDocument
                                  . lookupDocIdMap i
                                  . idToSmallDoc
                                  $ d

  makeEmpty _                   = emptyDocuments

  -- this is a sufficient test, but if the doc ids don't form an intervall
  -- it may be too strict

  disjointDocs dt1 dt2
      | nullDocs dt1
        ||
        nullDocs dt2            = True
      | otherwise               = disjoint ( minKeyDocIdMap . idToSmallDoc $ dt1
                                           , maxKeyDocIdMap . idToSmallDoc $ dt1
                                           )
                                           ( minKeyDocIdMap . idToSmallDoc $ dt2
                                           , maxKeyDocIdMap . idToSmallDoc $ dt2
                                           )
      where
        disjoint p1@(x1, y1) p2@(x2, _y2)
            | x1 <= x2          = y1 < x2
            | otherwise         = disjoint p2 p1

  unionDocs dt1 dt2
      | disjointDocs dt1 dt2    = SmallDocuments
                                  { idToSmallDoc = unionDocIdMap (idToSmallDoc dt1) (idToSmallDoc dt2)
                                  }
      | otherwise               = error $
                                  "unionDocs: doctables are not disjoint: " ++
                                  show (didIntervall dt1) ++ ", " ++ show (didIntervall dt2)
      where
       didIntervall dt          = ( minKeyDocIdMap . idToSmallDoc $ dt
                                  , maxKeyDocIdMap . idToSmallDoc $ dt
                                  )

  editDocIds f d                = SmallDocuments
                                  { idToSmallDoc = newIdToDoc
                                  }
      where
        newIdToDoc              = foldWithKeyDocIdMap (insertDocIdMap . f) emptyDocIdMap
                                  $ idToSmallDoc d

  fromMap                       = SmallDocuments . CD.toDocMap
  toMap                         = CD.fromDocMap . idToSmallDoc

  -- only lookup by doc id, union and defragment ops are implemented
  -- the others are not needed when merging or searching the doc indexes

  lookupByURI                   = notImpl

  insertDoc                     = notImpl
  updateDoc                     = notImpl
  removeById                    = notImpl
  updateDocuments               = notImpl
  filterDocuments               = notImpl

-- ----------------------------------------------------------------------------

instance NFData a =>            NFData (SmallDocuments a)
    where
    rnf (SmallDocuments i2d)    = rnf i2d

-- ----------------------------------------------------------------------------

instance (Binary a, XmlPickler a) =>
                                XmlPickler (SmallDocuments a)
    where
    xpickle                     = xpElem "documents" $
                                  xpWrap convertDoctable $
                                  xpWrap (fromListDocIdMap, toListDocIdMap) $
                                  xpList xpDocumentWithId
        where
        convertDoctable         = ( SmallDocuments
                                  , idToSmallDoc
                                  )
        xpDocumentWithId        = xpElem "doc" $
                                  xpPair (xpAttr "id" xpDocId) xpickle

-- ----------------------------------------------------------------------------

instance Binary a =>            Binary (SmallDocuments a)
    where
    put (SmallDocuments i2d)    = B.put i2d
    get                         = do
                                  i2d <- B.get
                                  return $ SmallDocuments i2d

-- ------------------------------------------------------------

notImpl                         :: a
notImpl                         = error "operation not implemented for SmallDocuments data type"

-- ------------------------------------------------------------

-- | Create an empty table.

emptyDocuments                  :: SmallDocuments a
emptyDocuments                  = SmallDocuments emptyDocIdMap

-- | Create a document table containing a single document.

singleton                       :: (Binary a) => Document a -> SmallDocuments a
singleton d                     = SmallDocuments (singletonDocIdMap firstDocId (CD.fromDocument d))

-- | Convert a Compact document table into a small compact document table.
-- Called at the end of building an index

docTable2smallDocTable          :: CD.Documents a -> SmallDocuments a
docTable2smallDocTable          =  SmallDocuments . CD.idToDoc

-- ------------------------------------------------------------