{-# LANGUAGE BangPatterns, NamedFieldPuns #-}
module Data.SearchEngine.SearchIndex (
SearchIndex,
Term,
TermId,
DocId,
emptySearchIndex,
insertDoc,
deleteDoc,
docCount,
lookupTerm,
lookupTermsByPrefix,
lookupTermId,
lookupDocId,
lookupDocKey,
lookupDocKeyDocId,
getTerm,
getDocKey,
invariant,
) where
import Data.SearchEngine.DocIdSet (DocIdSet, DocId)
import qualified Data.SearchEngine.DocIdSet as DocIdSet
import Data.SearchEngine.DocTermIds (DocTermIds, TermId, vecIndexIx, vecCreateIx)
import qualified Data.SearchEngine.DocTermIds as DocTermIds
import Data.SearchEngine.DocFeatVals (DocFeatVals)
import qualified Data.SearchEngine.DocFeatVals as DocFeatVals
import Data.Ix (Ix)
import qualified Data.Ix as Ix
import Data.Map (Map)
import qualified Data.Map as Map
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import Data.List (foldl')
import Control.Exception (assert)
type Term = Text
data SearchIndex key field feature = SearchIndex {
forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap :: !(Map Term TermInfo),
forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap :: !(IntMap TermIdInfo),
forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap :: !(IntMap (DocInfo key field feature)),
forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap :: !(Map key DocId),
forall key field feature. SearchIndex key field feature -> TermId
nextTermId :: TermId,
forall key field feature. SearchIndex key field feature -> DocId
nextDocId :: DocId
}
deriving Int -> SearchIndex key field feature -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall key field feature.
Show key =>
Int -> SearchIndex key field feature -> ShowS
forall key field feature.
Show key =>
[SearchIndex key field feature] -> ShowS
forall key field feature.
Show key =>
SearchIndex key field feature -> String
showList :: [SearchIndex key field feature] -> ShowS
$cshowList :: forall key field feature.
Show key =>
[SearchIndex key field feature] -> ShowS
show :: SearchIndex key field feature -> String
$cshow :: forall key field feature.
Show key =>
SearchIndex key field feature -> String
showsPrec :: Int -> SearchIndex key field feature -> ShowS
$cshowsPrec :: forall key field feature.
Show key =>
Int -> SearchIndex key field feature -> ShowS
Show
data TermInfo = TermInfo !TermId !DocIdSet
deriving Int -> TermInfo -> ShowS
[TermInfo] -> ShowS
TermInfo -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TermInfo] -> ShowS
$cshowList :: [TermInfo] -> ShowS
show :: TermInfo -> String
$cshow :: TermInfo -> String
showsPrec :: Int -> TermInfo -> ShowS
$cshowsPrec :: Int -> TermInfo -> ShowS
Show
data TermIdInfo = TermIdInfo !Term !DocIdSet
deriving (Int -> TermIdInfo -> ShowS
[TermIdInfo] -> ShowS
TermIdInfo -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TermIdInfo] -> ShowS
$cshowList :: [TermIdInfo] -> ShowS
show :: TermIdInfo -> String
$cshow :: TermIdInfo -> String
showsPrec :: Int -> TermIdInfo -> ShowS
$cshowsPrec :: Int -> TermIdInfo -> ShowS
Show, TermIdInfo -> TermIdInfo -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TermIdInfo -> TermIdInfo -> Bool
$c/= :: TermIdInfo -> TermIdInfo -> Bool
== :: TermIdInfo -> TermIdInfo -> Bool
$c== :: TermIdInfo -> TermIdInfo -> Bool
Eq)
data DocInfo key field feature = DocInfo !key !(DocTermIds field)
!(DocFeatVals feature)
deriving Int -> DocInfo key field feature -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall key field feature.
Show key =>
Int -> DocInfo key field feature -> ShowS
forall key field feature.
Show key =>
[DocInfo key field feature] -> ShowS
forall key field feature.
Show key =>
DocInfo key field feature -> String
showList :: [DocInfo key field feature] -> ShowS
$cshowList :: forall key field feature.
Show key =>
[DocInfo key field feature] -> ShowS
show :: DocInfo key field feature -> String
$cshow :: forall key field feature.
Show key =>
DocInfo key field feature -> String
showsPrec :: Int -> DocInfo key field feature -> ShowS
$cshowsPrec :: forall key field feature.
Show key =>
Int -> DocInfo key field feature -> ShowS
Show
emptySearchIndex :: SearchIndex key field feature
emptySearchIndex :: forall key field feature. SearchIndex key field feature
emptySearchIndex =
forall key field feature.
Map Term TermInfo
-> IntMap TermIdInfo
-> IntMap (DocInfo key field feature)
-> Map key DocId
-> TermId
-> DocId
-> SearchIndex key field feature
SearchIndex
forall k a. Map k a
Map.empty
forall a. IntMap a
IntMap.empty
forall a. IntMap a
IntMap.empty
forall k a. Map k a
Map.empty
forall a. Bounded a => a
minBound
forall a. Bounded a => a
minBound
checkInvariant :: (Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant :: forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant SearchIndex key field feature
si = forall a. (?callStack::CallStack) => Bool -> a -> a
assert (forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> Bool
invariant SearchIndex key field feature
si) SearchIndex key field feature
si
invariant :: (Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> Bool
invariant :: forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> Bool
invariant SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap, IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap, Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap, IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} =
forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum TermId
termId) IntMap TermIdInfo
termIdMap
forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just (Term -> DocIdSet -> TermIdInfo
TermIdInfo Term
term DocIdSet
docidset)
| (Term
term, (TermInfo TermId
termId DocIdSet
docidset)) <- forall k a. Map k a -> [(k, a)]
Map.assocs Map Term TermInfo
termMap ]
Bool -> Bool -> Bool
&& forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Term
term Map Term TermInfo
termMap of
Just (TermInfo TermId
termId' DocIdSet
docidset') -> forall a. Enum a => Int -> a
toEnum Int
termId forall a. Eq a => a -> a -> Bool
== TermId
termId'
Bool -> Bool -> Bool
&& DocIdSet
docidset forall a. Eq a => a -> a -> Bool
== DocIdSet
docidset'
Maybe TermInfo
Nothing -> Bool
False
| (Int
termId, (TermIdInfo Term
term DocIdSet
docidset)) <- forall a. IntMap a -> [(Int, a)]
IntMap.assocs IntMap TermIdInfo
termIdMap ]
Bool -> Bool -> Bool
&& forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum DocId
docId) IntMap (DocInfo key field feature)
docIdMap of
Just (DocInfo key
docKey' DocTermIds field
_ DocFeatVals feature
_) -> key
docKey forall a. Eq a => a -> a -> Bool
== key
docKey'
Maybe (DocInfo key field feature)
Nothing -> Bool
False
| (key
docKey, DocId
docId) <- forall k a. Map k a -> [(k, a)]
Map.assocs Map key DocId
docKeyMap ]
Bool -> Bool -> Bool
&& forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
docKey Map key DocId
docKeyMap forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just (forall a. Enum a => Int -> a
toEnum Int
docId)
| (Int
docId, DocInfo key
docKey DocTermIds field
_ DocFeatVals feature
_) <- forall a. IntMap a -> [(Int, a)]
IntMap.assocs IntMap (DocInfo key field feature)
docIdMap ]
Bool -> Bool -> Bool
&& forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ DocIdSet -> Bool
DocIdSet.invariant DocIdSet
docIdSet
| (Term
_term, (TermInfo TermId
_ DocIdSet
docIdSet)) <- forall k a. Map k a -> [(k, a)]
Map.assocs Map Term TermInfo
termMap ]
Bool -> Bool -> Bool
&& forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\field
field -> forall field.
(Ix field, Bounded field) =>
DocTermIds field -> field -> TermId -> Int
DocTermIds.fieldTermCount DocTermIds field
docterms field
field TermId
termId forall a. Ord a => a -> a -> Bool
> Int
0) [field]
fields
| (Term
_term, (TermInfo TermId
termId DocIdSet
docIdSet)) <- forall k a. Map k a -> [(k, a)]
Map.assocs Map Term TermInfo
termMap
, DocId
docId <- DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
docIdSet
, let DocInfo key
_ DocTermIds field
docterms DocFeatVals feature
_ = IntMap (DocInfo key field feature)
docIdMap forall a. IntMap a -> Int -> a
IntMap.! forall a. Enum a => a -> Int
fromEnum DocId
docId ]
Bool -> Bool -> Bool
&& forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ forall a. Int -> IntMap a -> Bool
IntMap.member (forall a. Enum a => a -> Int
fromEnum TermId
termid) IntMap TermIdInfo
termIdMap
| (Int
_docId, DocInfo key
_ DocTermIds field
docTerms DocFeatVals feature
_) <- forall a. IntMap a -> [(Int, a)]
IntMap.assocs IntMap (DocInfo key field feature)
docIdMap
, field
field <- [field]
fields
, TermId
termid <- forall field.
(Ix field, Bounded field) =>
DocTermIds field -> field -> [TermId]
DocTermIds.fieldElems DocTermIds field
docTerms field
field ]
where
fields :: [field]
fields = forall a. Ix a => (a, a) -> [a]
Ix.range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
docCount :: SearchIndex key field feature -> Int
docCount :: forall key field feature. SearchIndex key field feature -> Int
docCount SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} = forall a. IntMap a -> Int
IntMap.size IntMap (DocInfo key field feature)
docIdMap
lookupTerm :: SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
lookupTerm :: forall key field feature.
SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
lookupTerm SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap} Term
term =
case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Term
term Map Term TermInfo
termMap of
Maybe TermInfo
Nothing -> forall a. Maybe a
Nothing
Just (TermInfo TermId
termid DocIdSet
docidset) -> forall a. a -> Maybe a
Just (TermId
termid, DocIdSet
docidset)
lookupTermsByPrefix :: SearchIndex key field feature ->
Term -> [(TermId, DocIdSet)]
lookupTermsByPrefix :: forall key field feature.
SearchIndex key field feature -> Term -> [(TermId, DocIdSet)]
lookupTermsByPrefix SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap} Term
term =
[ (TermId
termid, DocIdSet
docidset)
| (TermInfo TermId
termid DocIdSet
docidset) <- forall v. Term -> Map Term v -> [v]
lookupPrefix Term
term Map Term TermInfo
termMap ]
lookupTermId :: SearchIndex key field feature -> TermId -> DocIdSet
lookupTermId :: forall key field feature.
SearchIndex key field feature -> TermId -> DocIdSet
lookupTermId SearchIndex{IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap} TermId
termid =
case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum TermId
termid) IntMap TermIdInfo
termIdMap of
Maybe TermIdInfo
Nothing -> forall a. (?callStack::CallStack) => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"lookupTermId: not found " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show TermId
termid
Just (TermIdInfo Term
_ DocIdSet
docidset) -> DocIdSet
docidset
lookupDocId :: SearchIndex key field feature ->
DocId -> (key, DocTermIds field, DocFeatVals feature)
lookupDocId :: forall key field feature.
SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
lookupDocId SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} DocId
docid =
case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum DocId
docid) IntMap (DocInfo key field feature)
docIdMap of
Maybe (DocInfo key field feature)
Nothing -> forall {a}. a
errNotFound
Just (DocInfo key
key DocTermIds field
doctermids DocFeatVals feature
docfeatvals) -> (key
key, DocTermIds field
doctermids, DocFeatVals feature
docfeatvals)
where
errNotFound :: a
errNotFound = forall a. (?callStack::CallStack) => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"lookupDocId: not found " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show DocId
docid
lookupDocKey :: Ord key => SearchIndex key field feature ->
key -> Maybe (DocTermIds field)
lookupDocKey :: forall key field feature.
Ord key =>
SearchIndex key field feature -> key -> Maybe (DocTermIds field)
lookupDocKey SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap, IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} key
key = do
case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
key Map key DocId
docKeyMap of
Maybe DocId
Nothing -> forall a. Maybe a
Nothing
Just DocId
docid ->
case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum DocId
docid) IntMap (DocInfo key field feature)
docIdMap of
Maybe (DocInfo key field feature)
Nothing -> forall a. (?callStack::CallStack) => String -> a
error String
"lookupDocKey: internal error"
Just (DocInfo key
_key DocTermIds field
doctermids DocFeatVals feature
_) -> forall a. a -> Maybe a
Just DocTermIds field
doctermids
lookupDocKeyDocId :: Ord key => SearchIndex key field feature -> key -> Maybe DocId
lookupDocKeyDocId :: forall key field feature.
Ord key =>
SearchIndex key field feature -> key -> Maybe DocId
lookupDocKeyDocId SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap} key
key = forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
key Map key DocId
docKeyMap
getTerm :: SearchIndex key field feature -> TermId -> Term
getTerm :: forall key field feature.
SearchIndex key field feature -> TermId -> Term
getTerm SearchIndex{IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap} TermId
termId =
case IntMap TermIdInfo
termIdMap forall a. IntMap a -> Int -> a
IntMap.! forall a. Enum a => a -> Int
fromEnum TermId
termId of TermIdInfo Term
term DocIdSet
_ -> Term
term
getTermId :: SearchIndex key field feature -> Term -> TermId
getTermId :: forall key field feature.
SearchIndex key field feature -> Term -> TermId
getTermId SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap} Term
term =
case Map Term TermInfo
termMap forall k a. Ord k => Map k a -> k -> a
Map.! Term
term of TermInfo TermId
termid DocIdSet
_ -> TermId
termid
getDocKey :: SearchIndex key field feature -> DocId -> key
getDocKey :: forall key field feature.
SearchIndex key field feature -> DocId -> key
getDocKey SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} DocId
docid =
case IntMap (DocInfo key field feature)
docIdMap forall a. IntMap a -> Int -> a
IntMap.! forall a. Enum a => a -> Int
fromEnum DocId
docid of
DocInfo key
dockey DocTermIds field
_ DocFeatVals feature
_ -> key
dockey
getDocTermIds :: SearchIndex key field feature -> DocId -> DocTermIds field
getDocTermIds :: forall key field feature.
SearchIndex key field feature -> DocId -> DocTermIds field
getDocTermIds SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} DocId
docid =
case IntMap (DocInfo key field feature)
docIdMap forall a. IntMap a -> Int -> a
IntMap.! forall a. Enum a => a -> Int
fromEnum DocId
docid of
DocInfo key
_ DocTermIds field
doctermids DocFeatVals feature
_ -> DocTermIds field
doctermids
type DocTerms field = field -> [Term]
type DocFeatureValues feature = feature -> Float
insertDoc :: (Ord key, Ix field, Bounded field, Ix feature, Bounded feature) =>
key -> DocTerms field -> DocFeatureValues feature ->
SearchIndex key field feature -> SearchIndex key field feature
insertDoc :: forall key field feature.
(Ord key, Ix field, Bounded field, Ix feature, Bounded feature) =>
key
-> DocTerms field
-> DocFeatureValues feature
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDoc key
key DocTerms field
userDocTerms DocFeatureValues feature
userDocFeats si :: SearchIndex key field feature
si@SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap}
| Just DocId
docid <- forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
key Map key DocId
docKeyMap
=
let oldTermsIds :: DocTermIds field
oldTermsIds = forall key field feature.
SearchIndex key field feature -> DocId -> DocTermIds field
getDocTermIds SearchIndex key field feature
si DocId
docid
userDocTerms' :: DocTerms field
userDocTerms' = forall field.
(Ix field, Bounded field) =>
DocTerms field -> DocTerms field
memoiseDocTerms DocTerms field
userDocTerms
newTerms :: Set Term
newTerms = forall t. (Bounded t, Ix t) => DocTerms t -> Set Term
docTermSet DocTerms field
userDocTerms'
oldTerms :: Set Term
oldTerms = forall field key feature.
(Bounded field, Ix field) =>
SearchIndex key field feature -> DocTermIds field -> Set Term
docTermIdsTermSet SearchIndex key field feature
si DocTermIds field
oldTermsIds
delTerms :: Set Term
delTerms = Set Term
oldTerms forall a. Ord a => Set a -> Set a -> Set a
`Set.difference` Set Term
newTerms
addTerms :: Set Term
addTerms = Set Term
newTerms forall a. Ord a => Set a -> Set a -> Set a
`Set.difference` Set Term
oldTerms
in forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall field feature key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocId
-> key
-> DocTerms field
-> DocFeatureValues feature
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocIdToDocEntry DocId
docid key
key DocTerms field
userDocTerms' DocFeatureValues feature
userDocFeats
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntries (forall a. Set a -> [a]
Set.toList Set Term
addTerms) DocId
docid
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntries (forall a. Set a -> [a]
Set.toList Set Term
delTerms) DocId
docid
forall a b. (a -> b) -> a -> b
$ SearchIndex key field feature
si
| Bool
otherwise
=
let (SearchIndex key field feature
si', DocId
docid) = forall key field feature.
SearchIndex key field feature
-> (SearchIndex key field feature, DocId)
allocFreshDocId SearchIndex key field feature
si
userDocTerms' :: DocTerms field
userDocTerms' = forall field.
(Ix field, Bounded field) =>
DocTerms field -> DocTerms field
memoiseDocTerms DocTerms field
userDocTerms
addTerms :: Set Term
addTerms = forall t. (Bounded t, Ix t) => DocTerms t -> Set Term
docTermSet DocTerms field
userDocTerms'
in forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall field feature key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocId
-> key
-> DocTerms field
-> DocFeatureValues feature
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocIdToDocEntry DocId
docid key
key DocTerms field
userDocTerms' DocFeatureValues feature
userDocFeats
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
Ord key =>
key
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocKeyToIdEntry key
key DocId
docid
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntries (forall a. Set a -> [a]
Set.toList Set Term
addTerms) DocId
docid
forall a b. (a -> b) -> a -> b
$ SearchIndex key field feature
si'
deleteDoc :: (Ord key, Ix field, Bounded field) =>
key ->
SearchIndex key field feature -> SearchIndex key field feature
deleteDoc :: forall key field feature.
(Ord key, Ix field, Bounded field) =>
key
-> SearchIndex key field feature -> SearchIndex key field feature
deleteDoc key
key si :: SearchIndex key field feature
si@SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap}
| Just DocId
docid <- forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
key Map key DocId
docKeyMap
= let oldTermsIds :: DocTermIds field
oldTermsIds = forall key field feature.
SearchIndex key field feature -> DocId -> DocTermIds field
getDocTermIds SearchIndex key field feature
si DocId
docid
oldTerms :: Set Term
oldTerms = forall field key feature.
(Bounded field, Ix field) =>
SearchIndex key field feature -> DocTermIds field -> Set Term
docTermIdsTermSet SearchIndex key field feature
si DocTermIds field
oldTermsIds
in forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
Ord key =>
DocId
-> key
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteDocEntry DocId
docid key
key
forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntries (forall a. Set a -> [a]
Set.toList Set Term
oldTerms) DocId
docid
forall a b. (a -> b) -> a -> b
$ SearchIndex key field feature
si
| Bool
otherwise = SearchIndex key field feature
si
memoiseDocTerms :: (Ix field, Bounded field) => DocTerms field -> DocTerms field
memoiseDocTerms :: forall field.
(Ix field, Bounded field) =>
DocTerms field -> DocTerms field
memoiseDocTerms DocTerms field
docTermsFn =
\field
field -> forall ix a. (Ix ix, Bounded ix) => Vector a -> ix -> a
vecIndexIx Vector [Term]
vec field
field
where
vec :: Vector [Term]
vec = forall ix a. (Ix ix, Bounded ix) => (ix -> a) -> Vector a
vecCreateIx DocTerms field
docTermsFn
docTermSet :: (Bounded t, Ix t) => DocTerms t -> Set.Set Term
docTermSet :: forall t. (Bounded t, Ix t) => DocTerms t -> Set Term
docTermSet DocTerms t
docterms =
forall (f :: * -> *) a. (Foldable f, Ord a) => f (Set a) -> Set a
Set.unions [ forall a. Ord a => [a] -> Set a
Set.fromList (DocTerms t
docterms t
field)
| t
field <- forall a. Ix a => (a, a) -> [a]
Ix.range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) ]
docTermIdsTermSet :: (Bounded field, Ix field) =>
SearchIndex key field feature ->
DocTermIds field -> Set.Set Term
docTermIdsTermSet :: forall field key feature.
(Bounded field, Ix field) =>
SearchIndex key field feature -> DocTermIds field -> Set Term
docTermIdsTermSet SearchIndex key field feature
si DocTermIds field
doctermids =
forall (f :: * -> *) a. (Foldable f, Ord a) => f (Set a) -> Set a
Set.unions [ forall a. Ord a => [a] -> Set a
Set.fromList [Term]
terms
| field
field <- forall a. Ix a => (a, a) -> [a]
Ix.range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
, let termids :: [TermId]
termids = forall field.
(Ix field, Bounded field) =>
DocTermIds field -> field -> [TermId]
DocTermIds.fieldElems DocTermIds field
doctermids field
field
terms :: [Term]
terms = forall a b. (a -> b) -> [a] -> [b]
map (forall key field feature.
SearchIndex key field feature -> TermId -> Term
getTerm SearchIndex key field feature
si) [TermId]
termids ]
insertTermToDocIdEntry :: Term -> DocId ->
SearchIndex key field feature ->
SearchIndex key field feature
insertTermToDocIdEntry :: forall key field feature.
Term
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntry Term
term !DocId
docid si :: SearchIndex key field feature
si@SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap, IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap, TermId
nextTermId :: TermId
nextTermId :: forall key field feature. SearchIndex key field feature -> TermId
nextTermId} =
case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Term
term Map Term TermInfo
termMap of
Maybe TermInfo
Nothing ->
let docIdSet' :: DocIdSet
docIdSet' = DocId -> DocIdSet
DocIdSet.singleton DocId
docid
!termInfo' :: TermInfo
termInfo' = TermId -> DocIdSet -> TermInfo
TermInfo TermId
nextTermId DocIdSet
docIdSet'
!termIdInfo' :: TermIdInfo
termIdInfo' = Term -> DocIdSet -> TermIdInfo
TermIdInfo Term
term DocIdSet
docIdSet'
in SearchIndex key field feature
si { termMap :: Map Term TermInfo
termMap = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Term
term TermInfo
termInfo' Map Term TermInfo
termMap
, termIdMap :: IntMap TermIdInfo
termIdMap = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (forall a. Enum a => a -> Int
fromEnum TermId
nextTermId)
TermIdInfo
termIdInfo' IntMap TermIdInfo
termIdMap
, nextTermId :: TermId
nextTermId = forall a. Enum a => a -> a
succ TermId
nextTermId }
Just (TermInfo TermId
termId DocIdSet
docIdSet) ->
let docIdSet' :: DocIdSet
docIdSet' = DocId -> DocIdSet -> DocIdSet
DocIdSet.insert DocId
docid DocIdSet
docIdSet
!termInfo' :: TermInfo
termInfo' = TermId -> DocIdSet -> TermInfo
TermInfo TermId
termId DocIdSet
docIdSet'
!termIdInfo' :: TermIdInfo
termIdInfo' = Term -> DocIdSet -> TermIdInfo
TermIdInfo Term
term DocIdSet
docIdSet'
in SearchIndex key field feature
si { termMap :: Map Term TermInfo
termMap = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Term
term TermInfo
termInfo' Map Term TermInfo
termMap
, termIdMap :: IntMap TermIdInfo
termIdMap = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (forall a. Enum a => a -> Int
fromEnum TermId
termId)
TermIdInfo
termIdInfo' IntMap TermIdInfo
termIdMap
}
insertTermToDocIdEntries :: [Term] -> DocId ->
SearchIndex key field feature ->
SearchIndex key field feature
insertTermToDocIdEntries :: forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntries [Term]
terms !DocId
docid SearchIndex key field feature
si =
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\SearchIndex key field feature
si' Term
term -> forall key field feature.
Term
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntry Term
term DocId
docid SearchIndex key field feature
si') SearchIndex key field feature
si [Term]
terms
deleteTermToDocIdEntry :: Term -> DocId ->
SearchIndex key field feature ->
SearchIndex key field feature
deleteTermToDocIdEntry :: forall key field feature.
Term
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntry Term
term !DocId
docid si :: SearchIndex key field feature
si@SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap, IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap} =
case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Term
term Map Term TermInfo
termMap of
Maybe TermInfo
Nothing -> SearchIndex key field feature
si
Just (TermInfo TermId
termId DocIdSet
docIdSet) ->
let docIdSet' :: DocIdSet
docIdSet' = DocId -> DocIdSet -> DocIdSet
DocIdSet.delete DocId
docid DocIdSet
docIdSet
!termInfo' :: TermInfo
termInfo' = TermId -> DocIdSet -> TermInfo
TermInfo TermId
termId DocIdSet
docIdSet'
!termIdInfo' :: TermIdInfo
termIdInfo' = Term -> DocIdSet -> TermIdInfo
TermIdInfo Term
term DocIdSet
docIdSet'
in if DocIdSet -> Bool
DocIdSet.null DocIdSet
docIdSet'
then SearchIndex key field feature
si { termMap :: Map Term TermInfo
termMap = forall k a. Ord k => k -> Map k a -> Map k a
Map.delete Term
term Map Term TermInfo
termMap
, termIdMap :: IntMap TermIdInfo
termIdMap = forall a. Int -> IntMap a -> IntMap a
IntMap.delete (forall a. Enum a => a -> Int
fromEnum TermId
termId) IntMap TermIdInfo
termIdMap }
else SearchIndex key field feature
si { termMap :: Map Term TermInfo
termMap = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Term
term TermInfo
termInfo' Map Term TermInfo
termMap
, termIdMap :: IntMap TermIdInfo
termIdMap = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (forall a. Enum a => a -> Int
fromEnum TermId
termId)
TermIdInfo
termIdInfo' IntMap TermIdInfo
termIdMap
}
deleteTermToDocIdEntries :: [Term] -> DocId ->
SearchIndex key field feature ->
SearchIndex key field feature
deleteTermToDocIdEntries :: forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntries [Term]
terms !DocId
docid SearchIndex key field feature
si =
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\SearchIndex key field feature
si' Term
term -> forall key field feature.
Term
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntry Term
term DocId
docid SearchIndex key field feature
si') SearchIndex key field feature
si [Term]
terms
allocFreshDocId :: SearchIndex key field feature ->
(SearchIndex key field feature, DocId)
allocFreshDocId :: forall key field feature.
SearchIndex key field feature
-> (SearchIndex key field feature, DocId)
allocFreshDocId si :: SearchIndex key field feature
si@SearchIndex{DocId
nextDocId :: DocId
nextDocId :: forall key field feature. SearchIndex key field feature -> DocId
nextDocId} =
let !si' :: SearchIndex key field feature
si' = SearchIndex key field feature
si { nextDocId :: DocId
nextDocId = forall a. Enum a => a -> a
succ DocId
nextDocId }
in (SearchIndex key field feature
si', DocId
nextDocId)
insertDocKeyToIdEntry :: Ord key => key -> DocId ->
SearchIndex key field feature ->
SearchIndex key field feature
insertDocKeyToIdEntry :: forall key field feature.
Ord key =>
key
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocKeyToIdEntry key
dockey !DocId
docid si :: SearchIndex key field feature
si@SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap} =
SearchIndex key field feature
si { docKeyMap :: Map key DocId
docKeyMap = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert key
dockey DocId
docid Map key DocId
docKeyMap }
insertDocIdToDocEntry :: (Ix field, Bounded field,
Ix feature, Bounded feature) =>
DocId -> key ->
DocTerms field ->
DocFeatureValues feature ->
SearchIndex key field feature ->
SearchIndex key field feature
insertDocIdToDocEntry :: forall field feature key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocId
-> key
-> DocTerms field
-> DocFeatureValues feature
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocIdToDocEntry !DocId
docid key
dockey DocTerms field
userdocterms DocFeatureValues feature
userdocfeats
si :: SearchIndex key field feature
si@SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} =
let doctermids :: DocTermIds field
doctermids = forall field.
(Ix field, Bounded field) =>
(field -> [TermId]) -> DocTermIds field
DocTermIds.create (forall a b. (a -> b) -> [a] -> [b]
map (forall key field feature.
SearchIndex key field feature -> Term -> TermId
getTermId SearchIndex key field feature
si) forall b c a. (b -> c) -> (a -> b) -> a -> c
. DocTerms field
userdocterms)
docfeatvals :: DocFeatVals feature
docfeatvals= forall feature.
(Ix feature, Bounded feature) =>
(feature -> Float) -> DocFeatVals feature
DocFeatVals.create DocFeatureValues feature
userdocfeats
!docinfo :: DocInfo key field feature
docinfo = forall key field feature.
key
-> DocTermIds field
-> DocFeatVals feature
-> DocInfo key field feature
DocInfo key
dockey DocTermIds field
doctermids DocFeatVals feature
docfeatvals
in SearchIndex key field feature
si { docIdMap :: IntMap (DocInfo key field feature)
docIdMap = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (forall a. Enum a => a -> Int
fromEnum DocId
docid) DocInfo key field feature
docinfo IntMap (DocInfo key field feature)
docIdMap }
deleteDocEntry :: Ord key => DocId -> key ->
SearchIndex key field feature -> SearchIndex key field feature
deleteDocEntry :: forall key field feature.
Ord key =>
DocId
-> key
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteDocEntry DocId
docid key
key si :: SearchIndex key field feature
si@SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap, Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap} =
SearchIndex key field feature
si { docIdMap :: IntMap (DocInfo key field feature)
docIdMap = forall a. Int -> IntMap a -> IntMap a
IntMap.delete (forall a. Enum a => a -> Int
fromEnum DocId
docid) IntMap (DocInfo key field feature)
docIdMap
, docKeyMap :: Map key DocId
docKeyMap = forall k a. Ord k => k -> Map k a -> Map k a
Map.delete key
key Map key DocId
docKeyMap }
lookupRange :: Ord k => (k, k) -> Map k v -> [v]
lookupRange :: forall k v. Ord k => (k, k) -> Map k v -> [v]
lookupRange (k
lb, k
ub) Map k v
m =
let (Map k v
_, Maybe v
mv, Map k v
gt) = forall k a. Ord k => k -> Map k a -> (Map k a, Maybe a, Map k a)
Map.splitLookup k
lb Map k v
m
(Map k v
between, Map k v
_) = forall k a. Ord k => k -> Map k a -> (Map k a, Map k a)
Map.split k
ub Map k v
gt
in case Maybe v
mv of
Just v
v -> v
v forall a. a -> [a] -> [a]
: forall k a. Map k a -> [a]
Map.elems Map k v
between
Maybe v
Nothing -> forall k a. Map k a -> [a]
Map.elems Map k v
between
lookupPrefix :: Text -> Map Text v -> [v]
lookupPrefix :: forall v. Term -> Map Term v -> [v]
lookupPrefix Term
t Map Term v
_ | Term -> Bool
T.null Term
t = []
lookupPrefix Term
t Map Term v
m = forall k v. Ord k => (k, k) -> Map k v -> [v]
lookupRange (Term
t, Term -> Term
prefixUpperBound Term
t) Map Term v
m
prefixUpperBound :: Text -> Text
prefixUpperBound :: Term -> Term
prefixUpperBound = Term -> Term
succLast forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Term -> Term
T.dropWhileEnd (forall a. Eq a => a -> a -> Bool
== forall a. Bounded a => a
maxBound)
where
succLast :: Term -> Term
succLast Term
t = Term -> Term
T.init Term
t Term -> Char -> Term
`T.snoc` forall a. Enum a => a -> a
succ (Term -> Char
T.last Term
t)