{-# LANGUAGE BangPatterns, NamedFieldPuns #-}

module Data.SearchEngine.SearchIndex (
    SearchIndex,
    Term,
    TermId,
    DocId,

    emptySearchIndex,
    insertDoc,
    deleteDoc,

    docCount,
    lookupTerm,
    lookupTermsByPrefix,
    lookupTermId,
    lookupDocId,
    lookupDocKey,
    lookupDocKeyDocId,

    getTerm,
    getDocKey,

    invariant,
  ) where

import Data.SearchEngine.DocIdSet (DocIdSet, DocId)
import qualified Data.SearchEngine.DocIdSet as DocIdSet
import Data.SearchEngine.DocTermIds (DocTermIds, TermId, vecIndexIx, vecCreateIx)
import qualified Data.SearchEngine.DocTermIds as DocTermIds
import Data.SearchEngine.DocFeatVals (DocFeatVals)
import qualified Data.SearchEngine.DocFeatVals as DocFeatVals

import Data.Ix (Ix)
import qualified Data.Ix as Ix
import Data.Map (Map)
import qualified Data.Map as Map
import Data.IntMap (IntMap)
import qualified Data.IntMap as IntMap
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import Data.List (foldl')

import Control.Exception (assert)

-- | Terms are short strings, usually whole words.
--
type Term = Text

-- | The search index is essentially a many-to-many mapping between documents
-- and terms. Each document contains many terms and each term occurs in many
-- documents. It is a bidirectional mapping as we need to support lookups in
-- both directions.
--
-- Documents are identified by a key (in Ord) while terms are text values.
-- Inside the index however we assign compact numeric ids to both documents and
-- terms. The advantage of this is a much more compact in-memory representation
-- and the disadvantage is greater complexity. In particular it means we have
-- to manage bidirectional mappings between document keys and ids, and between
-- terms and term ids.
--
-- So the mappings we maintain can be depicted as:
--
-- >  Term   <-- 1:1 -->   TermId
-- >          \              ^
-- >           \             |
-- >           1:many    many:many
-- >                \        |
-- >                 \->     v
-- > DocKey  <-- 1:1 -->   DocId
--
-- For efficiency, these details are exposed in the interface. In particular
-- the mapping from TermId to many DocIds is exposed via a 'DocIdSet',
-- and the mapping from DocIds to TermIds is exposed via 'DocTermIds'.
--
-- The main reason we need to keep the DocId -> TermId is to allow for
-- efficient incremental updates.
--
data SearchIndex key field feature = SearchIndex {
       -- the indexes
       forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap           :: !(Map Term TermInfo),
       forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap         :: !(IntMap TermIdInfo),
       forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap          :: !(IntMap (DocInfo key field feature)),
       forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap         :: !(Map key DocId),

       -- auto-increment key counters
       forall key field feature. SearchIndex key field feature -> TermId
nextTermId        :: TermId,
       forall key field feature. SearchIndex key field feature -> DocId
nextDocId         :: DocId
     }
  deriving Int -> SearchIndex key field feature -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall key field feature.
Show key =>
Int -> SearchIndex key field feature -> ShowS
forall key field feature.
Show key =>
[SearchIndex key field feature] -> ShowS
forall key field feature.
Show key =>
SearchIndex key field feature -> String
showList :: [SearchIndex key field feature] -> ShowS
$cshowList :: forall key field feature.
Show key =>
[SearchIndex key field feature] -> ShowS
show :: SearchIndex key field feature -> String
$cshow :: forall key field feature.
Show key =>
SearchIndex key field feature -> String
showsPrec :: Int -> SearchIndex key field feature -> ShowS
$cshowsPrec :: forall key field feature.
Show key =>
Int -> SearchIndex key field feature -> ShowS
Show

data TermInfo = TermInfo !TermId !DocIdSet
  deriving Int -> TermInfo -> ShowS
[TermInfo] -> ShowS
TermInfo -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TermInfo] -> ShowS
$cshowList :: [TermInfo] -> ShowS
show :: TermInfo -> String
$cshow :: TermInfo -> String
showsPrec :: Int -> TermInfo -> ShowS
$cshowsPrec :: Int -> TermInfo -> ShowS
Show

data TermIdInfo = TermIdInfo !Term !DocIdSet
  deriving (Int -> TermIdInfo -> ShowS
[TermIdInfo] -> ShowS
TermIdInfo -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TermIdInfo] -> ShowS
$cshowList :: [TermIdInfo] -> ShowS
show :: TermIdInfo -> String
$cshow :: TermIdInfo -> String
showsPrec :: Int -> TermIdInfo -> ShowS
$cshowsPrec :: Int -> TermIdInfo -> ShowS
Show, TermIdInfo -> TermIdInfo -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TermIdInfo -> TermIdInfo -> Bool
$c/= :: TermIdInfo -> TermIdInfo -> Bool
== :: TermIdInfo -> TermIdInfo -> Bool
$c== :: TermIdInfo -> TermIdInfo -> Bool
Eq)

data DocInfo key field feature = DocInfo !key !(DocTermIds field)
                                              !(DocFeatVals feature)
  deriving Int -> DocInfo key field feature -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall key field feature.
Show key =>
Int -> DocInfo key field feature -> ShowS
forall key field feature.
Show key =>
[DocInfo key field feature] -> ShowS
forall key field feature.
Show key =>
DocInfo key field feature -> String
showList :: [DocInfo key field feature] -> ShowS
$cshowList :: forall key field feature.
Show key =>
[DocInfo key field feature] -> ShowS
show :: DocInfo key field feature -> String
$cshow :: forall key field feature.
Show key =>
DocInfo key field feature -> String
showsPrec :: Int -> DocInfo key field feature -> ShowS
$cshowsPrec :: forall key field feature.
Show key =>
Int -> DocInfo key field feature -> ShowS
Show


-----------------------
-- SearchIndex basics
--

emptySearchIndex :: SearchIndex key field feature
emptySearchIndex :: forall key field feature. SearchIndex key field feature
emptySearchIndex =
    forall key field feature.
Map Term TermInfo
-> IntMap TermIdInfo
-> IntMap (DocInfo key field feature)
-> Map key DocId
-> TermId
-> DocId
-> SearchIndex key field feature
SearchIndex
      forall k a. Map k a
Map.empty
      forall a. IntMap a
IntMap.empty
      forall a. IntMap a
IntMap.empty
      forall k a. Map k a
Map.empty
      forall a. Bounded a => a
minBound
      forall a. Bounded a => a
minBound

checkInvariant :: (Ord key, Ix field, Bounded field) =>
                  SearchIndex key field feature -> SearchIndex key field feature
checkInvariant :: forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant SearchIndex key field feature
si = forall a. (?callStack::CallStack) => Bool -> a -> a
assert (forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> Bool
invariant SearchIndex key field feature
si) SearchIndex key field feature
si

invariant :: (Ord key, Ix field, Bounded field) =>
             SearchIndex key field feature -> Bool
invariant :: forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> Bool
invariant SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap, IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap, Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap, IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} =
      forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum TermId
termId) IntMap TermIdInfo
termIdMap
            forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just (Term -> DocIdSet -> TermIdInfo
TermIdInfo Term
term DocIdSet
docidset)
          | (Term
term, (TermInfo TermId
termId DocIdSet
docidset)) <- forall k a. Map k a -> [(k, a)]
Map.assocs Map Term TermInfo
termMap ]
  Bool -> Bool -> Bool
&&  forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Term
term Map Term TermInfo
termMap of
              Just (TermInfo TermId
termId' DocIdSet
docidset') -> forall a. Enum a => Int -> a
toEnum Int
termId forall a. Eq a => a -> a -> Bool
== TermId
termId'
                                                   Bool -> Bool -> Bool
&& DocIdSet
docidset forall a. Eq a => a -> a -> Bool
== DocIdSet
docidset'
              Maybe TermInfo
Nothing                           -> Bool
False
          | (Int
termId, (TermIdInfo Term
term DocIdSet
docidset)) <- forall a. IntMap a -> [(Int, a)]
IntMap.assocs IntMap TermIdInfo
termIdMap ]
  Bool -> Bool -> Bool
&&  forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum DocId
docId) IntMap (DocInfo key field feature)
docIdMap of
              Just (DocInfo key
docKey' DocTermIds field
_ DocFeatVals feature
_) -> key
docKey forall a. Eq a => a -> a -> Bool
== key
docKey'
              Maybe (DocInfo key field feature)
Nothing                  -> Bool
False
          | (key
docKey, DocId
docId) <- forall k a. Map k a -> [(k, a)]
Map.assocs Map key DocId
docKeyMap ]
  Bool -> Bool -> Bool
&&  forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
docKey Map key DocId
docKeyMap forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just (forall a. Enum a => Int -> a
toEnum Int
docId)
          | (Int
docId, DocInfo key
docKey DocTermIds field
_ DocFeatVals feature
_) <- forall a. IntMap a -> [(Int, a)]
IntMap.assocs IntMap (DocInfo key field feature)
docIdMap ]
  Bool -> Bool -> Bool
&&  forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ DocIdSet -> Bool
DocIdSet.invariant DocIdSet
docIdSet
          | (Term
_term, (TermInfo TermId
_ DocIdSet
docIdSet)) <- forall k a. Map k a -> [(k, a)]
Map.assocs Map Term TermInfo
termMap ]
  Bool -> Bool -> Bool
&&  forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (\field
field -> forall field.
(Ix field, Bounded field) =>
DocTermIds field -> field -> TermId -> Int
DocTermIds.fieldTermCount DocTermIds field
docterms field
field TermId
termId forall a. Ord a => a -> a -> Bool
> Int
0) [field]
fields
          | (Term
_term, (TermInfo TermId
termId DocIdSet
docIdSet)) <- forall k a. Map k a -> [(k, a)]
Map.assocs Map Term TermInfo
termMap
          , DocId
docId <- DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
docIdSet
          , let DocInfo key
_ DocTermIds field
docterms DocFeatVals feature
_ = IntMap (DocInfo key field feature)
docIdMap forall a. IntMap a -> Int -> a
IntMap.! forall a. Enum a => a -> Int
fromEnum DocId
docId ]
  Bool -> Bool -> Bool
&&  forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ forall a. Int -> IntMap a -> Bool
IntMap.member (forall a. Enum a => a -> Int
fromEnum TermId
termid) IntMap TermIdInfo
termIdMap
          | (Int
_docId, DocInfo key
_ DocTermIds field
docTerms DocFeatVals feature
_) <- forall a. IntMap a -> [(Int, a)]
IntMap.assocs IntMap (DocInfo key field feature)
docIdMap
          , field
field <- [field]
fields
          , TermId
termid <- forall field.
(Ix field, Bounded field) =>
DocTermIds field -> field -> [TermId]
DocTermIds.fieldElems DocTermIds field
docTerms field
field ]
  where
    fields :: [field]
fields = forall a. Ix a => (a, a) -> [a]
Ix.range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)


-------------------
-- Lookups
--

docCount :: SearchIndex key field feature -> Int
docCount :: forall key field feature. SearchIndex key field feature -> Int
docCount SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} = forall a. IntMap a -> Int
IntMap.size IntMap (DocInfo key field feature)
docIdMap

lookupTerm :: SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
lookupTerm :: forall key field feature.
SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
lookupTerm SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap} Term
term =
    case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Term
term Map Term TermInfo
termMap of
      Maybe TermInfo
Nothing                         -> forall a. Maybe a
Nothing
      Just (TermInfo TermId
termid DocIdSet
docidset) -> forall a. a -> Maybe a
Just (TermId
termid, DocIdSet
docidset)

lookupTermsByPrefix :: SearchIndex key field feature ->
                       Term -> [(TermId, DocIdSet)]
lookupTermsByPrefix :: forall key field feature.
SearchIndex key field feature -> Term -> [(TermId, DocIdSet)]
lookupTermsByPrefix SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap} Term
term =
    [ (TermId
termid, DocIdSet
docidset)
    | (TermInfo TermId
termid DocIdSet
docidset) <- forall v. Term -> Map Term v -> [v]
lookupPrefix Term
term Map Term TermInfo
termMap ]

lookupTermId :: SearchIndex key field feature -> TermId -> DocIdSet
lookupTermId :: forall key field feature.
SearchIndex key field feature -> TermId -> DocIdSet
lookupTermId SearchIndex{IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap} TermId
termid =
    case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum TermId
termid) IntMap TermIdInfo
termIdMap of
      Maybe TermIdInfo
Nothing -> forall a. (?callStack::CallStack) => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"lookupTermId: not found " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show TermId
termid
      Just (TermIdInfo Term
_ DocIdSet
docidset) -> DocIdSet
docidset

lookupDocId :: SearchIndex key field feature ->
               DocId -> (key, DocTermIds field, DocFeatVals feature)
lookupDocId :: forall key field feature.
SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
lookupDocId SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} DocId
docid =
    case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum DocId
docid) IntMap (DocInfo key field feature)
docIdMap of
      Maybe (DocInfo key field feature)
Nothing                                   -> forall {a}. a
errNotFound
      Just (DocInfo key
key DocTermIds field
doctermids DocFeatVals feature
docfeatvals) -> (key
key, DocTermIds field
doctermids, DocFeatVals feature
docfeatvals)
  where
    errNotFound :: a
errNotFound = forall a. (?callStack::CallStack) => String -> a
error forall a b. (a -> b) -> a -> b
$ String
"lookupDocId: not found " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show DocId
docid

lookupDocKey :: Ord key => SearchIndex key field feature ->
                key -> Maybe (DocTermIds field)
lookupDocKey :: forall key field feature.
Ord key =>
SearchIndex key field feature -> key -> Maybe (DocTermIds field)
lookupDocKey SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap, IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} key
key = do
    case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
key Map key DocId
docKeyMap of
      Maybe DocId
Nothing    -> forall a. Maybe a
Nothing
      Just DocId
docid ->
        case forall a. Int -> IntMap a -> Maybe a
IntMap.lookup (forall a. Enum a => a -> Int
fromEnum DocId
docid) IntMap (DocInfo key field feature)
docIdMap of
          Maybe (DocInfo key field feature)
Nothing                          -> forall a. (?callStack::CallStack) => String -> a
error String
"lookupDocKey: internal error"
          Just (DocInfo key
_key DocTermIds field
doctermids DocFeatVals feature
_) -> forall a. a -> Maybe a
Just DocTermIds field
doctermids

lookupDocKeyDocId :: Ord key => SearchIndex key field feature -> key -> Maybe DocId
lookupDocKeyDocId :: forall key field feature.
Ord key =>
SearchIndex key field feature -> key -> Maybe DocId
lookupDocKeyDocId SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap} key
key = forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
key Map key DocId
docKeyMap


getTerm :: SearchIndex key field feature -> TermId -> Term
getTerm :: forall key field feature.
SearchIndex key field feature -> TermId -> Term
getTerm SearchIndex{IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap} TermId
termId =
    case IntMap TermIdInfo
termIdMap forall a. IntMap a -> Int -> a
IntMap.! forall a. Enum a => a -> Int
fromEnum TermId
termId of TermIdInfo Term
term DocIdSet
_ -> Term
term

getTermId :: SearchIndex key field feature -> Term -> TermId
getTermId :: forall key field feature.
SearchIndex key field feature -> Term -> TermId
getTermId SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap} Term
term =
    case Map Term TermInfo
termMap forall k a. Ord k => Map k a -> k -> a
Map.! Term
term of TermInfo TermId
termid DocIdSet
_ -> TermId
termid

getDocKey :: SearchIndex key field feature -> DocId -> key
getDocKey :: forall key field feature.
SearchIndex key field feature -> DocId -> key
getDocKey SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} DocId
docid =
    case IntMap (DocInfo key field feature)
docIdMap forall a. IntMap a -> Int -> a
IntMap.! forall a. Enum a => a -> Int
fromEnum DocId
docid of
      DocInfo key
dockey DocTermIds field
_ DocFeatVals feature
_ -> key
dockey

getDocTermIds :: SearchIndex key field feature -> DocId -> DocTermIds field
getDocTermIds :: forall key field feature.
SearchIndex key field feature -> DocId -> DocTermIds field
getDocTermIds SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} DocId
docid =
    case IntMap (DocInfo key field feature)
docIdMap forall a. IntMap a -> Int -> a
IntMap.! forall a. Enum a => a -> Int
fromEnum DocId
docid of
      DocInfo key
_ DocTermIds field
doctermids DocFeatVals feature
_ -> DocTermIds field
doctermids

--------------------
-- Insert & delete
--

-- Procedure for adding a new doc...
-- (key, field -> [Term])
-- alloc docid for key
-- add term occurences for docid (include rev map for termid)
-- construct indexdoc now that we have all the term -> termid entries
-- insert indexdoc

-- Procedure for updating a doc...
-- (key, field -> [Term])
-- find docid for key
-- lookup old terms for docid (using termid rev map)
-- calc term occurrences to add, term occurrences to delete
-- add new term occurrences, delete old term occurrences
-- construct indexdoc now that we have all the term -> termid entries
-- insert indexdoc

-- Procedure for deleting a doc...
-- (key, field -> [Term])
-- find docid for key
-- lookup old terms for docid (using termid rev map)
-- delete old term occurrences
-- delete indexdoc

-- | This is the representation for documents to be added to the index.
-- Documents may 
--
type DocTerms         field   = field   -> [Term]
type DocFeatureValues feature = feature -> Float

insertDoc :: (Ord key, Ix field, Bounded field, Ix feature, Bounded feature) =>
              key -> DocTerms field -> DocFeatureValues feature ->
              SearchIndex key field feature -> SearchIndex key field feature
insertDoc :: forall key field feature.
(Ord key, Ix field, Bounded field, Ix feature, Bounded feature) =>
key
-> DocTerms field
-> DocFeatureValues feature
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDoc key
key DocTerms field
userDocTerms DocFeatureValues feature
userDocFeats si :: SearchIndex key field feature
si@SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap}
  | Just DocId
docid <- forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
key Map key DocId
docKeyMap
  = -- Some older version of the doc is already present in the index,
    -- So we keep its docid. Now have to update the doc itself
    -- and update the terms by removing old ones and adding new ones.
    let oldTermsIds :: DocTermIds field
oldTermsIds   = forall key field feature.
SearchIndex key field feature -> DocId -> DocTermIds field
getDocTermIds SearchIndex key field feature
si DocId
docid
        userDocTerms' :: DocTerms field
userDocTerms' = forall field.
(Ix field, Bounded field) =>
DocTerms field -> DocTerms field
memoiseDocTerms DocTerms field
userDocTerms
        newTerms :: Set Term
newTerms      = forall t. (Bounded t, Ix t) => DocTerms t -> Set Term
docTermSet DocTerms field
userDocTerms'
        oldTerms :: Set Term
oldTerms      = forall field key feature.
(Bounded field, Ix field) =>
SearchIndex key field feature -> DocTermIds field -> Set Term
docTermIdsTermSet SearchIndex key field feature
si DocTermIds field
oldTermsIds
        -- We optimise for the typical case of significant overlap between
        -- the terms in the old and new versions of the document.
        delTerms :: Set Term
delTerms      = Set Term
oldTerms forall a. Ord a => Set a -> Set a -> Set a
`Set.difference` Set Term
newTerms
        addTerms :: Set Term
addTerms      = Set Term
newTerms forall a. Ord a => Set a -> Set a -> Set a
`Set.difference` Set Term
oldTerms

     -- Note: adding the doc relies on all the terms being in the termMap
     -- already, so we first add all the term occurences for the docid.
     in forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall field feature key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocId
-> key
-> DocTerms field
-> DocFeatureValues feature
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocIdToDocEntry DocId
docid key
key DocTerms field
userDocTerms' DocFeatureValues feature
userDocFeats
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntries (forall a. Set a -> [a]
Set.toList Set Term
addTerms) DocId
docid
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntries (forall a. Set a -> [a]
Set.toList Set Term
delTerms) DocId
docid
      forall a b. (a -> b) -> a -> b
$ SearchIndex key field feature
si

  | Bool
otherwise
  = -- We're dealing with a new doc, so allocate a docid for the key
    let (SearchIndex key field feature
si', DocId
docid)  = forall key field feature.
SearchIndex key field feature
-> (SearchIndex key field feature, DocId)
allocFreshDocId SearchIndex key field feature
si
        userDocTerms' :: DocTerms field
userDocTerms' = forall field.
(Ix field, Bounded field) =>
DocTerms field -> DocTerms field
memoiseDocTerms DocTerms field
userDocTerms
        addTerms :: Set Term
addTerms      = forall t. (Bounded t, Ix t) => DocTerms t -> Set Term
docTermSet DocTerms field
userDocTerms'

     -- Note: adding the doc relies on all the terms being in the termMap
     -- already, so we first add all the term occurences for the docid.
     in forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall field feature key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocId
-> key
-> DocTerms field
-> DocFeatureValues feature
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocIdToDocEntry DocId
docid key
key DocTerms field
userDocTerms' DocFeatureValues feature
userDocFeats
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
Ord key =>
key
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocKeyToIdEntry key
key DocId
docid
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntries (forall a. Set a -> [a]
Set.toList Set Term
addTerms) DocId
docid
      forall a b. (a -> b) -> a -> b
$ SearchIndex key field feature
si'

deleteDoc :: (Ord key, Ix field, Bounded field) =>
             key ->
             SearchIndex key field feature -> SearchIndex key field feature
deleteDoc :: forall key field feature.
(Ord key, Ix field, Bounded field) =>
key
-> SearchIndex key field feature -> SearchIndex key field feature
deleteDoc key
key si :: SearchIndex key field feature
si@SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap}
  | Just DocId
docid <- forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup key
key Map key DocId
docKeyMap
  = let oldTermsIds :: DocTermIds field
oldTermsIds = forall key field feature.
SearchIndex key field feature -> DocId -> DocTermIds field
getDocTermIds SearchIndex key field feature
si DocId
docid
        oldTerms :: Set Term
oldTerms    = forall field key feature.
(Bounded field, Ix field) =>
SearchIndex key field feature -> DocTermIds field -> Set Term
docTermIdsTermSet SearchIndex key field feature
si DocTermIds field
oldTermsIds
     in forall key field feature.
(Ord key, Ix field, Bounded field) =>
SearchIndex key field feature -> SearchIndex key field feature
checkInvariant
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
Ord key =>
DocId
-> key
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteDocEntry DocId
docid key
key
      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntries (forall a. Set a -> [a]
Set.toList Set Term
oldTerms) DocId
docid
      forall a b. (a -> b) -> a -> b
$ SearchIndex key field feature
si
  
  | Bool
otherwise = SearchIndex key field feature
si


----------------------------------
-- Insert & delete support utils
--


memoiseDocTerms :: (Ix field, Bounded field) => DocTerms field -> DocTerms field
memoiseDocTerms :: forall field.
(Ix field, Bounded field) =>
DocTerms field -> DocTerms field
memoiseDocTerms DocTerms field
docTermsFn =
    \field
field -> forall ix a. (Ix ix, Bounded ix) => Vector a -> ix -> a
vecIndexIx Vector [Term]
vec field
field
  where
    vec :: Vector [Term]
vec = forall ix a. (Ix ix, Bounded ix) => (ix -> a) -> Vector a
vecCreateIx DocTerms field
docTermsFn

docTermSet :: (Bounded t, Ix t) => DocTerms t -> Set.Set Term
docTermSet :: forall t. (Bounded t, Ix t) => DocTerms t -> Set Term
docTermSet DocTerms t
docterms =
    forall (f :: * -> *) a. (Foldable f, Ord a) => f (Set a) -> Set a
Set.unions [ forall a. Ord a => [a] -> Set a
Set.fromList (DocTerms t
docterms t
field)
               | t
field <- forall a. Ix a => (a, a) -> [a]
Ix.range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) ]

docTermIdsTermSet :: (Bounded field, Ix field) =>
                     SearchIndex key field feature ->
                     DocTermIds field -> Set.Set Term
docTermIdsTermSet :: forall field key feature.
(Bounded field, Ix field) =>
SearchIndex key field feature -> DocTermIds field -> Set Term
docTermIdsTermSet SearchIndex key field feature
si DocTermIds field
doctermids =
    forall (f :: * -> *) a. (Foldable f, Ord a) => f (Set a) -> Set a
Set.unions [ forall a. Ord a => [a] -> Set a
Set.fromList [Term]
terms
               | field
field <- forall a. Ix a => (a, a) -> [a]
Ix.range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
               , let termids :: [TermId]
termids = forall field.
(Ix field, Bounded field) =>
DocTermIds field -> field -> [TermId]
DocTermIds.fieldElems DocTermIds field
doctermids field
field
                     terms :: [Term]
terms   = forall a b. (a -> b) -> [a] -> [b]
map (forall key field feature.
SearchIndex key field feature -> TermId -> Term
getTerm SearchIndex key field feature
si) [TermId]
termids ]

--
-- The Term <-> DocId mapping
--

-- | Add an entry into the 'Term' to 'DocId' mapping.
insertTermToDocIdEntry :: Term -> DocId -> 
                          SearchIndex key field feature ->
                          SearchIndex key field feature
insertTermToDocIdEntry :: forall key field feature.
Term
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntry Term
term !DocId
docid si :: SearchIndex key field feature
si@SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap, IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap, TermId
nextTermId :: TermId
nextTermId :: forall key field feature. SearchIndex key field feature -> TermId
nextTermId} =
    case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Term
term Map Term TermInfo
termMap of
      Maybe TermInfo
Nothing ->
        let docIdSet' :: DocIdSet
docIdSet'    = DocId -> DocIdSet
DocIdSet.singleton DocId
docid
            !termInfo' :: TermInfo
termInfo'   = TermId -> DocIdSet -> TermInfo
TermInfo TermId
nextTermId DocIdSet
docIdSet'
            !termIdInfo' :: TermIdInfo
termIdInfo' = Term -> DocIdSet -> TermIdInfo
TermIdInfo Term
term     DocIdSet
docIdSet'
         in SearchIndex key field feature
si { termMap :: Map Term TermInfo
termMap    = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Term
term TermInfo
termInfo' Map Term TermInfo
termMap
               , termIdMap :: IntMap TermIdInfo
termIdMap  = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (forall a. Enum a => a -> Int
fromEnum TermId
nextTermId)
                                            TermIdInfo
termIdInfo' IntMap TermIdInfo
termIdMap
               , nextTermId :: TermId
nextTermId = forall a. Enum a => a -> a
succ TermId
nextTermId }

      Just (TermInfo TermId
termId DocIdSet
docIdSet) ->
        let docIdSet' :: DocIdSet
docIdSet'    = DocId -> DocIdSet -> DocIdSet
DocIdSet.insert DocId
docid DocIdSet
docIdSet
            !termInfo' :: TermInfo
termInfo'   = TermId -> DocIdSet -> TermInfo
TermInfo TermId
termId DocIdSet
docIdSet'
            !termIdInfo' :: TermIdInfo
termIdInfo' = Term -> DocIdSet -> TermIdInfo
TermIdInfo Term
term DocIdSet
docIdSet'
         in SearchIndex key field feature
si { termMap :: Map Term TermInfo
termMap   = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Term
term TermInfo
termInfo' Map Term TermInfo
termMap
               , termIdMap :: IntMap TermIdInfo
termIdMap = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (forall a. Enum a => a -> Int
fromEnum TermId
termId)
                                           TermIdInfo
termIdInfo' IntMap TermIdInfo
termIdMap
               }

-- | Add multiple entries into the 'Term' to 'DocId' mapping: many terms that
-- map to the same document.
insertTermToDocIdEntries :: [Term] -> DocId ->
                            SearchIndex key field feature ->
                            SearchIndex key field feature
insertTermToDocIdEntries :: forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntries [Term]
terms !DocId
docid SearchIndex key field feature
si =
    forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\SearchIndex key field feature
si' Term
term -> forall key field feature.
Term
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertTermToDocIdEntry Term
term DocId
docid SearchIndex key field feature
si') SearchIndex key field feature
si [Term]
terms

-- | Delete an entry from the 'Term' to 'DocId' mapping.
deleteTermToDocIdEntry :: Term -> DocId ->
                          SearchIndex key field feature ->
                          SearchIndex key field feature
deleteTermToDocIdEntry :: forall key field feature.
Term
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntry Term
term !DocId
docid si :: SearchIndex key field feature
si@SearchIndex{Map Term TermInfo
termMap :: Map Term TermInfo
termMap :: forall key field feature.
SearchIndex key field feature -> Map Term TermInfo
termMap, IntMap TermIdInfo
termIdMap :: IntMap TermIdInfo
termIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap TermIdInfo
termIdMap} =
    case  forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Term
term Map Term TermInfo
termMap of
      Maybe TermInfo
Nothing -> SearchIndex key field feature
si
      Just (TermInfo TermId
termId DocIdSet
docIdSet) ->
        let docIdSet' :: DocIdSet
docIdSet'    = DocId -> DocIdSet -> DocIdSet
DocIdSet.delete DocId
docid DocIdSet
docIdSet
            !termInfo' :: TermInfo
termInfo'   = TermId -> DocIdSet -> TermInfo
TermInfo TermId
termId DocIdSet
docIdSet'
            !termIdInfo' :: TermIdInfo
termIdInfo' = Term -> DocIdSet -> TermIdInfo
TermIdInfo Term
term DocIdSet
docIdSet'
        in if DocIdSet -> Bool
DocIdSet.null DocIdSet
docIdSet'
            then SearchIndex key field feature
si { termMap :: Map Term TermInfo
termMap = forall k a. Ord k => k -> Map k a -> Map k a
Map.delete Term
term Map Term TermInfo
termMap
                    , termIdMap :: IntMap TermIdInfo
termIdMap = forall a. Int -> IntMap a -> IntMap a
IntMap.delete (forall a. Enum a => a -> Int
fromEnum TermId
termId) IntMap TermIdInfo
termIdMap }
            else SearchIndex key field feature
si { termMap :: Map Term TermInfo
termMap   = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Term
term TermInfo
termInfo' Map Term TermInfo
termMap
                    , termIdMap :: IntMap TermIdInfo
termIdMap = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (forall a. Enum a => a -> Int
fromEnum TermId
termId)
                                                TermIdInfo
termIdInfo' IntMap TermIdInfo
termIdMap
                    }

-- | Delete multiple entries from the 'Term' to 'DocId' mapping: many terms
-- that map to the same document.
deleteTermToDocIdEntries :: [Term] -> DocId ->
                            SearchIndex key field feature ->
                            SearchIndex key field feature
deleteTermToDocIdEntries :: forall key field feature.
[Term]
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntries [Term]
terms !DocId
docid SearchIndex key field feature
si =
    forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\SearchIndex key field feature
si' Term
term -> forall key field feature.
Term
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteTermToDocIdEntry Term
term DocId
docid SearchIndex key field feature
si') SearchIndex key field feature
si [Term]
terms

--
-- The DocId <-> Doc mapping
--

allocFreshDocId :: SearchIndex key field feature ->
                  (SearchIndex key field feature, DocId)
allocFreshDocId :: forall key field feature.
SearchIndex key field feature
-> (SearchIndex key field feature, DocId)
allocFreshDocId si :: SearchIndex key field feature
si@SearchIndex{DocId
nextDocId :: DocId
nextDocId :: forall key field feature. SearchIndex key field feature -> DocId
nextDocId} =
    let !si' :: SearchIndex key field feature
si' = SearchIndex key field feature
si { nextDocId :: DocId
nextDocId = forall a. Enum a => a -> a
succ DocId
nextDocId }
     in (SearchIndex key field feature
si', DocId
nextDocId)

insertDocKeyToIdEntry :: Ord key => key -> DocId ->
                         SearchIndex key field feature ->
                         SearchIndex key field feature
insertDocKeyToIdEntry :: forall key field feature.
Ord key =>
key
-> DocId
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocKeyToIdEntry key
dockey !DocId
docid si :: SearchIndex key field feature
si@SearchIndex{Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap} =
    SearchIndex key field feature
si { docKeyMap :: Map key DocId
docKeyMap = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert key
dockey DocId
docid Map key DocId
docKeyMap }

insertDocIdToDocEntry :: (Ix field, Bounded field,
                          Ix feature, Bounded feature) =>
                         DocId -> key ->
                         DocTerms field ->
                         DocFeatureValues feature ->
                         SearchIndex key field feature ->
                         SearchIndex key field feature
insertDocIdToDocEntry :: forall field feature key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocId
-> key
-> DocTerms field
-> DocFeatureValues feature
-> SearchIndex key field feature
-> SearchIndex key field feature
insertDocIdToDocEntry !DocId
docid key
dockey DocTerms field
userdocterms DocFeatureValues feature
userdocfeats
                       si :: SearchIndex key field feature
si@SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap} =
    let doctermids :: DocTermIds field
doctermids = forall field.
(Ix field, Bounded field) =>
(field -> [TermId]) -> DocTermIds field
DocTermIds.create (forall a b. (a -> b) -> [a] -> [b]
map (forall key field feature.
SearchIndex key field feature -> Term -> TermId
getTermId SearchIndex key field feature
si) forall b c a. (b -> c) -> (a -> b) -> a -> c
. DocTerms field
userdocterms)
        docfeatvals :: DocFeatVals feature
docfeatvals= forall feature.
(Ix feature, Bounded feature) =>
(feature -> Float) -> DocFeatVals feature
DocFeatVals.create DocFeatureValues feature
userdocfeats
        !docinfo :: DocInfo key field feature
docinfo   = forall key field feature.
key
-> DocTermIds field
-> DocFeatVals feature
-> DocInfo key field feature
DocInfo key
dockey DocTermIds field
doctermids DocFeatVals feature
docfeatvals
     in SearchIndex key field feature
si { docIdMap :: IntMap (DocInfo key field feature)
docIdMap  = forall a. Int -> a -> IntMap a -> IntMap a
IntMap.insert (forall a. Enum a => a -> Int
fromEnum DocId
docid) DocInfo key field feature
docinfo IntMap (DocInfo key field feature)
docIdMap }

deleteDocEntry :: Ord key => DocId -> key ->
                  SearchIndex key field feature -> SearchIndex key field feature
deleteDocEntry :: forall key field feature.
Ord key =>
DocId
-> key
-> SearchIndex key field feature
-> SearchIndex key field feature
deleteDocEntry DocId
docid key
key si :: SearchIndex key field feature
si@SearchIndex{IntMap (DocInfo key field feature)
docIdMap :: IntMap (DocInfo key field feature)
docIdMap :: forall key field feature.
SearchIndex key field feature -> IntMap (DocInfo key field feature)
docIdMap, Map key DocId
docKeyMap :: Map key DocId
docKeyMap :: forall key field feature.
SearchIndex key field feature -> Map key DocId
docKeyMap} =
     SearchIndex key field feature
si { docIdMap :: IntMap (DocInfo key field feature)
docIdMap  = forall a. Int -> IntMap a -> IntMap a
IntMap.delete (forall a. Enum a => a -> Int
fromEnum DocId
docid) IntMap (DocInfo key field feature)
docIdMap
        , docKeyMap :: Map key DocId
docKeyMap = forall k a. Ord k => k -> Map k a -> Map k a
Map.delete key
key Map key DocId
docKeyMap }

--
-- Data.Map utils
--

-- Data.Map does not support prefix lookups directly (unlike a trie)
-- but we can implement it reasonably efficiently using split:

-- | Lookup values for a range of keys (inclusive lower bound and exclusive
-- upper bound)
--
lookupRange :: Ord k => (k, k) -> Map k v -> [v]
lookupRange :: forall k v. Ord k => (k, k) -> Map k v -> [v]
lookupRange (k
lb, k
ub) Map k v
m =
  let (Map k v
_, Maybe v
mv, Map k v
gt)  = forall k a. Ord k => k -> Map k a -> (Map k a, Maybe a, Map k a)
Map.splitLookup k
lb Map k v
m
      (Map k v
between, Map k v
_) = forall k a. Ord k => k -> Map k a -> (Map k a, Map k a)
Map.split       k
ub Map k v
gt
   in case Maybe v
mv of
        Just v
v  -> v
v forall a. a -> [a] -> [a]
: forall k a. Map k a -> [a]
Map.elems Map k v
between
        Maybe v
Nothing ->     forall k a. Map k a -> [a]
Map.elems Map k v
between

lookupPrefix :: Text -> Map Text v -> [v]
lookupPrefix :: forall v. Term -> Map Term v -> [v]
lookupPrefix Term
t Map Term v
_ | Term -> Bool
T.null Term
t = []
lookupPrefix Term
t Map Term v
m = forall k v. Ord k => (k, k) -> Map k v -> [v]
lookupRange (Term
t, Term -> Term
prefixUpperBound Term
t) Map Term v
m

prefixUpperBound :: Text -> Text
prefixUpperBound :: Term -> Term
prefixUpperBound = Term -> Term
succLast forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Term -> Term
T.dropWhileEnd (forall a. Eq a => a -> a -> Bool
== forall a. Bounded a => a
maxBound)
  where
    succLast :: Term -> Term
succLast Term
t = Term -> Term
T.init Term
t Term -> Char -> Term
`T.snoc` forall a. Enum a => a -> a
succ (Term -> Char
T.last Term
t)