{-# LANGUAGE BangPatterns, NamedFieldPuns, RecordWildCards #-}
module Data.SearchEngine.Query (
query,
ResultsFilter(..),
queryExplain,
BM25F.Explanation(..),
setRankParams,
relevanceScore,
indexDocToBM25Doc,
expandTransformedQueryTerm,
) where
import Data.SearchEngine.Types
import qualified Data.SearchEngine.SearchIndex as SI
import qualified Data.SearchEngine.DocIdSet as DocIdSet
import qualified Data.SearchEngine.DocTermIds as DocTermIds
import qualified Data.SearchEngine.DocFeatVals as DocFeatVals
import qualified Data.SearchEngine.BM25F as BM25F
import Data.Ix
import Data.List
import Data.Function
import Data.Maybe
query :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[Term] -> [key]
query :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature -> [Term] -> [key]
query se :: SearchEngine doc key field feature
se@SearchEngine{ SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex,
searchRankParams :: forall doc key field feature.
SearchEngine doc key field feature
-> SearchRankParameters field feature
searchRankParams = SearchRankParameters{Float
Int
field -> Float
feature -> Float
feature -> FeatureFunction
paramAutosuggestPostfilterLimit :: forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPrefilterLimit :: forall field feature. SearchRankParameters field feature -> Int
paramResultsetHardLimit :: forall field feature. SearchRankParameters field feature -> Int
paramResultsetSoftLimit :: forall field feature. SearchRankParameters field feature -> Int
paramFeatureFunctions :: forall field feature.
SearchRankParameters field feature -> feature -> FeatureFunction
paramFeatureWeights :: forall field feature.
SearchRankParameters field feature -> feature -> Float
paramFieldWeights :: forall field feature.
SearchRankParameters field feature -> field -> Float
paramB :: forall field feature.
SearchRankParameters field feature -> field -> Float
paramK1 :: forall field feature. SearchRankParameters field feature -> Float
paramAutosuggestPostfilterLimit :: Int
paramAutosuggestPrefilterLimit :: Int
paramResultsetHardLimit :: Int
paramResultsetSoftLimit :: Int
paramFeatureFunctions :: feature -> FeatureFunction
paramFeatureWeights :: feature -> Float
paramFieldWeights :: field -> Float
paramB :: field -> Float
paramK1 :: Float
..} }
[Term]
terms =
let
lookupTerms :: [Term]
lookupTerms :: [Term]
lookupTerms = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (forall field doc key feature.
(Ix field, Bounded field) =>
SearchEngine doc key field feature -> Term -> [Term]
expandTransformedQueryTerm SearchEngine doc key field feature
se) [Term]
terms
rawresults :: [Maybe (TermId, DocIdSet)]
rawresults :: [Maybe (TermId, DocIdSet)]
rawresults = forall a b. (a -> b) -> [a] -> [b]
map (forall key field feature.
SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
SI.lookupTerm SearchIndex key field feature
searchIndex) [Term]
lookupTerms
termids :: [TermId]
docidsets :: [DocIdSet]
([TermId]
termids, [DocIdSet]
docidsets) = forall a b. [(a, b)] -> ([a], [b])
unzip (forall a. [Maybe a] -> [a]
catMaybes [Maybe (TermId, DocIdSet)]
rawresults)
unrankedResults :: DocIdSet
unrankedResults :: DocIdSet
unrankedResults = Int -> Int -> [DocIdSet] -> DocIdSet
pruneRelevantResults
Int
paramResultsetSoftLimit
Int
paramResultsetHardLimit
[DocIdSet]
docidsets
in forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature -> [TermId] -> [DocId] -> [key]
rankResults SearchEngine doc key field feature
se [TermId]
termids (DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
unrankedResults)
expandTransformedQueryTerm :: (Ix field, Bounded field) =>
SearchEngine doc key field feature ->
Term -> [Term]
expandTransformedQueryTerm :: forall field doc key feature.
(Ix field, Bounded field) =>
SearchEngine doc key field feature -> Term -> [Term]
expandTransformedQueryTerm SearchEngine{SearchConfig doc key field feature
searchConfig :: forall doc key field feature.
SearchEngine doc key field feature
-> SearchConfig doc key field feature
searchConfig :: SearchConfig doc key field feature
searchConfig} Term
term =
forall a. Eq a => [a] -> [a]
nub [ field -> Term
transformForField field
field
| let transformForField :: field -> Term
transformForField = forall doc key field feature.
SearchConfig doc key field feature -> Term -> field -> Term
transformQueryTerm SearchConfig doc key field feature
searchConfig Term
term
, field
field <- forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) ]
rankResults :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] -> [DocId] -> [key]
rankResults :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature -> [TermId] -> [DocId] -> [key]
rankResults se :: SearchEngine doc key field feature
se@SearchEngine{SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex} [TermId]
queryTerms [DocId]
docids =
forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd
forall a b. (a -> b) -> a -> b
$ forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Ord a => a -> a -> Ordering
compare forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` forall a b. (a, b) -> a
fst)
[ (forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId] -> DocTermIds field -> DocFeatVals feature -> Float
relevanceScore SearchEngine doc key field feature
se [TermId]
queryTerms DocTermIds field
doctermids DocFeatVals feature
docfeatvals, key
dockey)
| DocId
docid <- [DocId]
docids
, let (key
dockey, DocTermIds field
doctermids, DocFeatVals feature
docfeatvals) = forall key field feature.
SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
SI.lookupDocId SearchIndex key field feature
searchIndex DocId
docid ]
relevanceScore :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] -> DocTermIds field -> DocFeatVals feature -> Float
relevanceScore :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId] -> DocTermIds field -> DocFeatVals feature -> Float
relevanceScore SearchEngine{Context TermId field feature
bm25Context :: forall doc key field feature.
SearchEngine doc key field feature -> Context TermId field feature
bm25Context :: Context TermId field feature
bm25Context} [TermId]
queryTerms DocTermIds field
doctermids DocFeatVals feature
docfeatvals =
forall field feature term.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> [term] -> Float
BM25F.score Context TermId field feature
bm25Context Doc TermId field feature
doc [TermId]
queryTerms
where
doc :: Doc TermId field feature
doc = forall field feature.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocTermIds field -> DocFeatVals feature -> Doc TermId field feature
indexDocToBM25Doc DocTermIds field
doctermids DocFeatVals feature
docfeatvals
indexDocToBM25Doc :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
DocTermIds field ->
DocFeatVals feature ->
BM25F.Doc TermId field feature
indexDocToBM25Doc :: forall field feature.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocTermIds field -> DocFeatVals feature -> Doc TermId field feature
indexDocToBM25Doc DocTermIds field
doctermids DocFeatVals feature
docfeatvals =
BM25F.Doc {
docFieldLength :: field -> Int
BM25F.docFieldLength = forall field.
(Ix field, Bounded field) =>
DocTermIds field -> field -> Int
DocTermIds.fieldLength DocTermIds field
doctermids,
docFieldTermFrequency :: field -> TermId -> Int
BM25F.docFieldTermFrequency = forall field.
(Ix field, Bounded field) =>
DocTermIds field -> field -> TermId -> Int
DocTermIds.fieldTermCount DocTermIds field
doctermids,
docFeatureValue :: feature -> Float
BM25F.docFeatureValue = forall feature.
(Ix feature, Bounded feature) =>
DocFeatVals feature -> feature -> Float
DocFeatVals.featureValue DocFeatVals feature
docfeatvals
}
pruneRelevantResults :: Int -> Int -> [DocIdSet] -> DocIdSet
pruneRelevantResults :: Int -> Int -> [DocIdSet] -> DocIdSet
pruneRelevantResults Int
softLimit Int
hardLimit =
DocIdSet -> [DocIdSet] -> DocIdSet
go DocIdSet
DocIdSet.empty forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a. Ord a => a -> a -> Ordering
compare forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` DocIdSet -> Int
DocIdSet.size)
where
go :: DocIdSet -> [DocIdSet] -> DocIdSet
go !DocIdSet
acc [] = DocIdSet
acc
go !DocIdSet
acc (DocIdSet
d:[DocIdSet]
ds)
| DocIdSet -> Bool
DocIdSet.null DocIdSet
acc = DocIdSet -> [DocIdSet] -> DocIdSet
go DocIdSet
d [DocIdSet]
ds
| Int
size forall a. Ord a => a -> a -> Bool
> Int
hardLimit = DocIdSet
acc
| Int
size forall a. Ord a => a -> a -> Bool
> Int
softLimit = DocIdSet -> DocIdSet -> DocIdSet
DocIdSet.union DocIdSet
acc DocIdSet
d
| Bool
otherwise = DocIdSet -> [DocIdSet] -> DocIdSet
go (DocIdSet -> DocIdSet -> DocIdSet
DocIdSet.union DocIdSet
acc DocIdSet
d) [DocIdSet]
ds
where
size :: Int
size = DocIdSet -> Int
DocIdSet.size DocIdSet
acc forall a. Num a => a -> a -> a
+ DocIdSet -> Int
DocIdSet.size DocIdSet
d
queryExplain :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[Term] -> [(BM25F.Explanation field feature Term, key)]
queryExplain :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [Term] -> [(Explanation field feature Term, key)]
queryExplain se :: SearchEngine doc key field feature
se@SearchEngine{ SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex,
searchConfig :: forall doc key field feature.
SearchEngine doc key field feature
-> SearchConfig doc key field feature
searchConfig = SearchConfig{Term -> field -> Term
transformQueryTerm :: Term -> field -> Term
transformQueryTerm :: forall doc key field feature.
SearchConfig doc key field feature -> Term -> field -> Term
transformQueryTerm},
searchRankParams :: forall doc key field feature.
SearchEngine doc key field feature
-> SearchRankParameters field feature
searchRankParams = SearchRankParameters{Float
Int
field -> Float
feature -> Float
feature -> FeatureFunction
paramAutosuggestPostfilterLimit :: Int
paramAutosuggestPrefilterLimit :: Int
paramResultsetHardLimit :: Int
paramResultsetSoftLimit :: Int
paramFeatureFunctions :: feature -> FeatureFunction
paramFeatureWeights :: feature -> Float
paramFieldWeights :: field -> Float
paramB :: field -> Float
paramK1 :: Float
paramAutosuggestPostfilterLimit :: forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPrefilterLimit :: forall field feature. SearchRankParameters field feature -> Int
paramResultsetHardLimit :: forall field feature. SearchRankParameters field feature -> Int
paramResultsetSoftLimit :: forall field feature. SearchRankParameters field feature -> Int
paramFeatureFunctions :: forall field feature.
SearchRankParameters field feature -> feature -> FeatureFunction
paramFeatureWeights :: forall field feature.
SearchRankParameters field feature -> feature -> Float
paramFieldWeights :: forall field feature.
SearchRankParameters field feature -> field -> Float
paramB :: forall field feature.
SearchRankParameters field feature -> field -> Float
paramK1 :: forall field feature. SearchRankParameters field feature -> Float
..} }
[Term]
terms =
let lookupTerms :: [Term]
lookupTerms :: [Term]
lookupTerms = [ Term
term'
| Term
term <- [Term]
terms
, let transformForField :: field -> Term
transformForField = Term -> field -> Term
transformQueryTerm Term
term
, Term
term' <- forall a. Eq a => [a] -> [a]
nub [ field -> Term
transformForField field
field
| field
field <- forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) ]
]
rawresults :: [Maybe (TermId, DocIdSet)]
rawresults :: [Maybe (TermId, DocIdSet)]
rawresults = forall a b. (a -> b) -> [a] -> [b]
map (forall key field feature.
SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
SI.lookupTerm SearchIndex key field feature
searchIndex) [Term]
lookupTerms
termids :: [TermId]
docidsets :: [DocIdSet]
([TermId]
termids, [DocIdSet]
docidsets) = forall a b. [(a, b)] -> ([a], [b])
unzip (forall a. [Maybe a] -> [a]
catMaybes [Maybe (TermId, DocIdSet)]
rawresults)
unrankedResults :: DocIdSet
unrankedResults :: DocIdSet
unrankedResults = Int -> Int -> [DocIdSet] -> DocIdSet
pruneRelevantResults
Int
paramResultsetSoftLimit
Int
paramResultsetHardLimit
[DocIdSet]
docidsets
in forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId] -> [DocId] -> [(Explanation field feature Term, key)]
rankExplainResults SearchEngine doc key field feature
se [TermId]
termids (DocIdSet -> [DocId]
DocIdSet.toList DocIdSet
unrankedResults)
rankExplainResults :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] ->
[DocId] ->
[(BM25F.Explanation field feature Term, key)]
rankExplainResults :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId] -> [DocId] -> [(Explanation field feature Term, key)]
rankExplainResults se :: SearchEngine doc key field feature
se@SearchEngine{SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex} [TermId]
queryTerms [DocId]
docids =
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. Ord a => a -> a -> Ordering
compare forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (forall field feature term. Explanation field feature term -> Float
BM25F.overallScore forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst))
[ (forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId]
-> DocTermIds field
-> DocFeatVals feature
-> Explanation field feature Term
explainRelevanceScore SearchEngine doc key field feature
se [TermId]
queryTerms DocTermIds field
doctermids DocFeatVals feature
docfeatvals, key
dockey)
| DocId
docid <- [DocId]
docids
, let (key
dockey, DocTermIds field
doctermids, DocFeatVals feature
docfeatvals) = forall key field feature.
SearchIndex key field feature
-> DocId -> (key, DocTermIds field, DocFeatVals feature)
SI.lookupDocId SearchIndex key field feature
searchIndex DocId
docid ]
explainRelevanceScore :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature ->
[TermId] ->
DocTermIds field ->
DocFeatVals feature ->
BM25F.Explanation field feature Term
explainRelevanceScore :: forall field feature doc key.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
SearchEngine doc key field feature
-> [TermId]
-> DocTermIds field
-> DocFeatVals feature
-> Explanation field feature Term
explainRelevanceScore SearchEngine{Context TermId field feature
bm25Context :: Context TermId field feature
bm25Context :: forall doc key field feature.
SearchEngine doc key field feature -> Context TermId field feature
bm25Context, SearchIndex key field feature
searchIndex :: SearchIndex key field feature
searchIndex :: forall doc key field feature.
SearchEngine doc key field feature -> SearchIndex key field feature
searchIndex}
[TermId]
queryTerms DocTermIds field
doctermids DocFeatVals feature
docfeatvals =
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall key field feature.
SearchIndex key field feature -> TermId -> Term
SI.getTerm SearchIndex key field feature
searchIndex) (forall field feature term.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature
-> [term]
-> Explanation field feature term
BM25F.explain Context TermId field feature
bm25Context Doc TermId field feature
doc [TermId]
queryTerms)
where
doc :: Doc TermId field feature
doc = forall field feature.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
DocTermIds field -> DocFeatVals feature -> Doc TermId field feature
indexDocToBM25Doc DocTermIds field
doctermids DocFeatVals feature
docfeatvals
setRankParams :: SearchRankParameters field feature ->
SearchEngine doc key field feature ->
SearchEngine doc key field feature
setRankParams :: forall field feature doc key.
SearchRankParameters field feature
-> SearchEngine doc key field feature
-> SearchEngine doc key field feature
setRankParams params :: SearchRankParameters field feature
params@SearchRankParameters{Float
Int
field -> Float
feature -> Float
feature -> FeatureFunction
paramAutosuggestPostfilterLimit :: Int
paramAutosuggestPrefilterLimit :: Int
paramResultsetHardLimit :: Int
paramResultsetSoftLimit :: Int
paramFeatureFunctions :: feature -> FeatureFunction
paramFeatureWeights :: feature -> Float
paramFieldWeights :: field -> Float
paramB :: field -> Float
paramK1 :: Float
paramAutosuggestPostfilterLimit :: forall field feature. SearchRankParameters field feature -> Int
paramAutosuggestPrefilterLimit :: forall field feature. SearchRankParameters field feature -> Int
paramResultsetHardLimit :: forall field feature. SearchRankParameters field feature -> Int
paramResultsetSoftLimit :: forall field feature. SearchRankParameters field feature -> Int
paramFeatureFunctions :: forall field feature.
SearchRankParameters field feature -> feature -> FeatureFunction
paramFeatureWeights :: forall field feature.
SearchRankParameters field feature -> feature -> Float
paramFieldWeights :: forall field feature.
SearchRankParameters field feature -> field -> Float
paramB :: forall field feature.
SearchRankParameters field feature -> field -> Float
paramK1 :: forall field feature. SearchRankParameters field feature -> Float
..} SearchEngine doc key field feature
se =
SearchEngine doc key field feature
se {
searchRankParams :: SearchRankParameters field feature
searchRankParams = SearchRankParameters field feature
params,
bm25Context :: Context TermId field feature
bm25Context = (forall doc key field feature.
SearchEngine doc key field feature -> Context TermId field feature
bm25Context SearchEngine doc key field feature
se) {
paramK1 :: Float
BM25F.paramK1 = Float
paramK1,
paramB :: field -> Float
BM25F.paramB = field -> Float
paramB,
fieldWeight :: field -> Float
BM25F.fieldWeight = field -> Float
paramFieldWeights,
featureWeight :: feature -> Float
BM25F.featureWeight = feature -> Float
paramFeatureWeights,
featureFunction :: feature -> FeatureFunction
BM25F.featureFunction = feature -> FeatureFunction
paramFeatureFunctions
}
}
data ResultsFilter key = NoFilter
| FilterPredicate (key -> Bool)
| FilterBulkPredicate ([key] -> [Bool])