module Hunt.ContextIndex
(
empty
, insertContext
, deleteContext
, foreachContext
, contexts
, contextsM
, hasContext
, hasContextM
, lookupRangeCx
, lookupAllWithCx
, searchWithCx
, searchWithCxsNormalized
, searchWithCxSc
, lookupRangeCxSc
, insertList
, createDocTableFromPartition
, unionDocTables
, modifyWithDescription
, delete
, deleteDocsByURI
, decodeCxIx
, member
, ContextIndex (..)
, ContextMap (..)
, IndexRep
, mkContextMap
, mapToSchema
)
where
import Prelude
import qualified Prelude as P
import Control.Applicative (Applicative, (<$>), (<*>))
import Control.Arrow
import Control.Monad
import qualified Control.Monad.Parallel as Par
import Data.Binary (Binary (..))
import Data.Binary.Get
import Data.ByteString.Lazy (ByteString)
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as S
import Data.Text (Text)
import Hunt.Common
import qualified Hunt.Common.DocDesc as DocDesc
import qualified Hunt.Common.DocIdSet as DS
import qualified Hunt.Common.Document as Doc
import Hunt.Common.IntermediateValue
import qualified Hunt.Common.Occurrences as Occ
import Hunt.DocTable (DocTable)
import qualified Hunt.DocTable as Dt
import qualified Hunt.Index as Ix
import Hunt.Index.IndexImpl (IndexImpl)
import qualified Hunt.Index.IndexImpl as Impl
import Hunt.Utility
data ContextIndex dt = ContextIndex
{ ciIndex :: !ContextMap
, ciDocs :: !dt
}
empty :: DocTable dt => ContextIndex dt
empty = ContextIndex emptyContextMap Dt.empty
type IndexRep = (ContextSchema, Impl.IndexImpl)
newtype ContextMap
= ContextMap { cxMap :: Map Context IndexRep }
deriving (Show)
emptyContextMap :: ContextMap
emptyContextMap = mkContextMap $ M.empty
mkContextMap :: Map Context IndexRep -> ContextMap
mkContextMap x = ContextMap $! x
mapToSchema :: ContextMap -> Schema
mapToSchema (ContextMap m) = M.map fst m
getContextMap :: [IndexImpl] -> Get ContextMap
getContextMap ts
= do
impls <- Impl.gets' ts
s <- get
return . mkContextMap . M.fromDistinctAscList $ map (mergeGets s) impls
where
mergeGets s (c, i) = (c, (getS c s, i))
getS c s = fromMaybe (error "deserializating failed: context schema is missing")
$ lookup c s
instance Binary ContextMap where
put = put . cxMap
get = get >>= return . mkContextMap
decodeCxIx :: (Binary dt, DocTable dt) => [IndexImpl] -> ByteString -> ContextIndex dt
decodeCxIx ts = runGet (get' ts)
get' :: Binary dt => [IndexImpl] -> Get (ContextIndex dt)
get' ts = ContextIndex <$> (getContextMap ts) <*> get
instance Binary dt => Binary (ContextIndex dt) where
get = error "existential types cannot be deserialized this way. Use special get' functions"
put (ContextIndex (ContextMap a) b)
= put (M.map snd a) >>
put (M.map fst a) >>
put b
insertList :: (Par.MonadParallel m, Applicative m, DocTable dt) =>
[(Dt.DValue dt, Words)] ->
ContextIndex dt -> m (ContextIndex dt)
insertList docAndWords (ContextIndex ix docTable)
= do
tablesAndWords <- Par.mapM createDocTableFromPartition
$ partitionListByLength 20 docAndWords
(newDt, docIdsAndWords) <- unionDocTables tablesAndWords docTable
newIx <- batchAddWordsM docIdsAndWords ix
return $! ContextIndex newIx newDt
createDocTableFromPartition :: (Par.MonadParallel m, DocTable dt) =>
[(Dt.DValue dt, Words)] -> m (dt, [(DocId, Words)])
createDocTableFromPartition ds
= foldM toDocTable (Dt.empty, []) ds
where
toDocTable (dt, resIdsAndWords) (doc, ws)
= do (dId, dt') <- Dt.insert doc dt
return (dt', (dId, ws):resIdsAndWords)
unionDocTables :: (DocTable dt, Par.MonadParallel m) =>
[(dt, [(DocId, Words)])] -> dt -> m (dt, [(DocId, Words)])
unionDocTables tablesAndWords oldDt
= do step <- Par.mapM unionDtsAndWords $ mkPairs tablesAndWords
case step of
[] -> return (Dt.empty, [])
[(d,w)] -> do n <- Dt.union oldDt d
return (n, w)
xs -> unionDocTables xs oldDt
where
unionDtsAndWords ((dt1, ws1), (dt2, ws2))
= do dt <- Dt.union dt1 dt2
return (dt, ws1 ++ ws2)
mkPairs [] = []
mkPairs (a:[]) = [(a,(Dt.empty,[]))]
mkPairs (a:b:xs) = (a,b):mkPairs xs
deleteDocsByURI :: (Par.MonadParallel m, Applicative m, DocTable dt)
=> Set URI -> ContextIndex dt -> m (ContextIndex dt)
deleteDocsByURI us ixx@(ContextIndex _ix dt) = do
docIds <- liftM (DS.fromList . catMaybes) . mapM (flip Dt.lookupByURI dt) . S.toList $ us
delete docIds ixx
delete :: (Par.MonadParallel m, Applicative m, DocTable dt)
=> DocIdSet -> ContextIndex dt -> m (ContextIndex dt)
delete dIds cix@(ContextIndex ix dt)
| DS.null dIds
= return cix
| otherwise
= do newIx <- delete' dIds ix
newDt <- Dt.difference dIds dt
return $ ContextIndex newIx newDt
member :: (Monad m, Applicative m, DocTable dt)
=> URI -> ContextIndex dt -> m Bool
member u (ContextIndex _ii dt) = do
mem <- Dt.lookupByURI u dt
return $ isJust mem
modifyWithDescription :: (Par.MonadParallel m, Applicative m, DocTable dt) =>
Score -> Description -> Words -> DocId -> ContextIndex dt ->
m (ContextIndex dt)
modifyWithDescription weight descr wrds dId (ContextIndex ii dt)
= do newDocTable <- Dt.adjust mergeDescr dId dt
newIndex <- batchAddWordsM [(dId,wrds)] ii
return $ ContextIndex newIndex newDocTable
where
mergeDescr
= return . Doc.update (updateWeight . updateDescr)
where
updateWeight d
| weight == noScore = d
| otherwise = d {wght = weight}
updateDescr d =
d {desc = DocDesc.deleteNull $
flip DocDesc.union d' descr'
}
where
d' =
desc d
descr' =
descr
batchAddWordsM :: (Functor m, Par.MonadParallel m) =>
[(DocId, Words)] -> ContextMap -> m ContextMap
batchAddWordsM [] ix
= return ix
batchAddWordsM vs (ContextMap m)
= mkContextMap <$>
mapWithKeyMP ( \cx (s, impl) -> do i <- foldinsertList cx impl
return (s, i)
) m
where
foldinsertList :: (Functor m, Monad m) =>
Context -> IndexImpl -> m IndexImpl
foldinsertList cx (Impl.IndexImpl impl)
= Impl.mkIndex <$>
Ix.insertListM (contentForCx cx vs) impl
contentForCx :: Context -> [(DocId, Words)] -> [(Word, IntermediateValue)]
contentForCx cx vs
= concatMap (invert . second (getWlForCx cx)) $ vs
where
invert (did, wl)
= map (second (toIntermediate . Occ.singleton' did)) $ M.toList wl
getWlForCx cx' ws'
= fromMaybe M.empty (M.lookup cx' ws')
mapWithKeyMP :: (Par.MonadParallel m, Ord k) => (k -> a -> m b) -> M.Map k a -> m (M.Map k b)
mapWithKeyMP f m =
(Par.mapM (\(k, a) -> do
b <- f k a
return (k, b)
) $ M.toList m) >>=
return . M.fromList
insertContext :: Context -> Impl.IndexImpl -> ContextSchema
-> ContextIndex dt -> ContextIndex dt
insertContext c ix schema (ContextIndex m dt)
= ContextIndex m' dt
where
m' = insertContext' c schema ix m
insertContext' :: Context -> ContextSchema -> Impl.IndexImpl -> ContextMap -> ContextMap
insertContext' c s ix (ContextMap m) = mkContextMap $ M.insertWith (const id) c (s, ix) m
deleteContext :: Context -> ContextIndex dt -> ContextIndex dt
deleteContext c (ContextIndex ix dt) = ContextIndex (deleteContext' c ix) dt
deleteContext' :: Context -> ContextMap -> ContextMap
deleteContext' cx (ContextMap m) = mkContextMap $ M.delete cx m
delete' :: Par.MonadParallel m => DocIdSet -> ContextMap -> m ContextMap
delete' dIds (ContextMap m)
= mapWithKeyMP (\_ impl -> adjust' impl) m >>= return . mkContextMap
where
adjust' (s, Impl.IndexImpl ix) = Ix.deleteDocsM dIds ix >>= \i -> return (s, Impl.mkIndex i)
lookupRangeCx :: Monad m => Context -> Text -> Text -> ContextMap -> m [(Text, IntermediateValue)]
lookupRangeCx c k1 k2 cm
= lookupIndex c cm $ Ix.lookupRangeM k1 k2
lookupAllWithCx :: Monad m => Context -> ContextMap -> m [(Text, IntermediateValue)]
lookupAllWithCx cx cm
= lookupIndex cx cm $ Ix.toListM
searchWithCx :: Monad m => TextSearchOp -> Context -> Text -> ContextMap -> m [(Text, IntermediateValue)]
searchWithCx op cx w cm
= lookupIndex cx cm $ Ix.searchM op w
searchWithCxsNormalized :: (Functor m, Monad m) =>
TextSearchOp -> [(Context, Text)] -> ContextMap ->
m [(Context, [(Text, IntermediateValue)])]
searchWithCxsNormalized op cxws cm
= P.mapM (uncurry search') cxws
where
search' cx w
= (\ x -> (cx, x))
<$> (lookupIndex cx cm $ Ix.searchM op w)
searchWithCxSc :: Monad m =>
TextSearchOp -> Context -> Text -> ContextMap -> m [(Text, (Score, Occurrences))]
searchWithCxSc op cx w cm
= (lookupIndex cx cm $ Ix.searchMSc op w) >>= return . fromScoredIntermediates
lookupRangeCxSc :: Monad m => Context -> Text -> Text -> ContextMap -> m [(Text, (Score, Occurrences))]
lookupRangeCxSc c k1 k2 cm
= (lookupIndex c cm $ Ix.lookupRangeMSc k1 k2) >>= return . fromScoredIntermediates
lookupIndex :: Monad m =>
Context -> ContextMap ->
(forall i . Impl.IndexImplCon i => i -> m [r]) ->
m [r]
lookupIndex cx (ContextMap m) search
= case M.lookup cx m of
Just (_, Impl.IndexImpl cm)
-> search cm
Nothing
-> return []
foreachContext :: (Functor m, Monad m) =>
[Context] ->
(Context -> m res) ->
m [(Context, res)]
foreachContext cxs action
= P.mapM action' cxs
where
action' cx
= (\ r -> (cx, r)) <$> action cx
contextsM :: (Monad m, DocTable dt)
=> ContextIndex dt -> m [Context]
contextsM (ContextIndex ix _) = return $ contexts ix
contexts :: ContextMap -> [Context]
contexts (ContextMap m) = M.keys m
hasContext :: Context -> ContextMap -> Bool
hasContext c (ContextMap m) = M.member c m
hasContextM :: (Monad m, DocTable dt)
=> Context -> ContextIndex dt -> m Bool
hasContextM c (ContextIndex ix _) = return $ hasContext c ix