{-# LANGUAGE RecordWildCards, BangPatterns, ScopedTypeVariables #-}
module Data.SearchEngine.BM25F (
score,
Context(..),
FeatureFunction(..),
Doc(..),
scoreTermsBulk,
Explanation(..),
explain,
) where
import Data.Ix
import Data.Array.Unboxed
data Context term field feature = Context {
forall term field feature. Context term field feature -> Int
numDocsTotal :: !Int,
forall term field feature.
Context term field feature -> field -> Float
avgFieldLength :: field -> Float,
forall term field feature.
Context term field feature -> term -> Int
numDocsWithTerm :: term -> Int,
forall term field feature. Context term field feature -> Float
paramK1 :: !Float,
forall term field feature.
Context term field feature -> field -> Float
paramB :: field -> Float,
forall term field feature.
Context term field feature -> field -> Float
fieldWeight :: field -> Float,
forall term field feature.
Context term field feature -> feature -> Float
featureWeight :: feature -> Float,
forall term field feature.
Context term field feature -> feature -> FeatureFunction
featureFunction :: feature -> FeatureFunction
}
data Doc term field feature = Doc {
forall term field feature. Doc term field feature -> field -> Int
docFieldLength :: field -> Int,
forall term field feature.
Doc term field feature -> field -> term -> Int
docFieldTermFrequency :: field -> term -> Int,
forall term field feature.
Doc term field feature -> feature -> Float
docFeatureValue :: feature -> Float
}
score :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature ->
Doc term field feature -> [term] -> Float
score :: forall field feature term.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> [term] -> Float
score Context term field feature
ctx Doc term field feature
doc [term]
terms =
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (forall a b. (a -> b) -> [a] -> [b]
map (forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx Doc term field feature
doc) [term]
terms)
forall a. Num a => a -> a -> a
+ forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (forall a b. (a -> b) -> [a] -> [b]
map (forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc) [feature]
features)
where
features :: [feature]
features = forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
weightedTermScore :: (Ix field, Bounded field) =>
Context term field feature ->
Doc term field feature -> term -> Float
weightedTermScore :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx Doc term field feature
doc term
t =
forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t forall a. Num a => a -> a -> a
* Float
tf'
forall a. Fractional a => a -> a -> a
/ (Float
k1 forall a. Num a => a -> a -> a
+ Float
tf')
where
tf' :: Float
tf' = forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedDocTermFrequency Context term field feature
ctx Doc term field feature
doc term
t
k1 :: Float
k1 = forall term field feature. Context term field feature -> Float
paramK1 Context term field feature
ctx
weightIDF :: Context term field feature -> term -> Float
weightIDF :: forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t =
forall a. Floating a => a -> a
log ((Float
n forall a. Num a => a -> a -> a
- Float
n_t forall a. Num a => a -> a -> a
+ Float
0.5) forall a. Fractional a => a -> a -> a
/ (Float
n_t forall a. Num a => a -> a -> a
+ Float
0.5))
where
n :: Float
n = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall term field feature. Context term field feature -> Int
numDocsTotal Context term field feature
ctx)
n_t :: Float
n_t = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall term field feature.
Context term field feature -> term -> Int
numDocsWithTerm Context term field feature
ctx term
t)
weightedDocTermFrequency :: (Ix field, Bounded field) =>
Context term field feature ->
Doc term field feature -> term -> Float
weightedDocTermFrequency :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedDocTermFrequency Context term field feature
ctx Doc term field feature
doc term
t =
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ Float
w_f forall a. Num a => a -> a -> a
* Float
tf_f forall a. Fractional a => a -> a -> a
/ Float
_B_f
| field
field <- forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
, let w_f :: Float
w_f = forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
field
tf_f :: Float
tf_f = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall term field feature.
Doc term field feature -> field -> term -> Int
docFieldTermFrequency Doc term field feature
doc field
field term
t)
_B_f :: Float
_B_f = forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field
, Bool -> Bool
not (forall a. RealFloat a => a -> Bool
isNaN Float
_B_f)
]
lengthNorm :: Context term field feature ->
Doc term field feature -> field -> Float
lengthNorm :: forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field =
(Float
1forall a. Num a => a -> a -> a
-Float
b_f) forall a. Num a => a -> a -> a
+ Float
b_f forall a. Num a => a -> a -> a
* Float
sl_f forall a. Fractional a => a -> a -> a
/ Float
avgsl_f
where
b_f :: Float
b_f = forall term field feature.
Context term field feature -> field -> Float
paramB Context term field feature
ctx field
field
sl_f :: Float
sl_f = forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall term field feature. Doc term field feature -> field -> Int
docFieldLength Doc term field feature
doc field
field)
avgsl_f :: Float
avgsl_f = forall term field feature.
Context term field feature -> field -> Float
avgFieldLength Context term field feature
ctx field
field
weightedNonTermScore :: (Ix feature, Bounded feature) =>
Context term field feature ->
Doc term field feature -> feature -> Float
weightedNonTermScore :: forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc feature
feature =
Float
w_f forall a. Num a => a -> a -> a
* Float -> Float
_V_f Float
f_f
where
w_f :: Float
w_f = forall term field feature.
Context term field feature -> feature -> Float
featureWeight Context term field feature
ctx feature
feature
_V_f :: Float -> Float
_V_f = FeatureFunction -> Float -> Float
applyFeatureFunction (forall term field feature.
Context term field feature -> feature -> FeatureFunction
featureFunction Context term field feature
ctx feature
feature)
f_f :: Float
f_f = forall term field feature.
Doc term field feature -> feature -> Float
docFeatureValue Doc term field feature
doc feature
feature
data FeatureFunction
= LogarithmicFunction Float
| RationalFunction Float
| SigmoidFunction Float Float
applyFeatureFunction :: FeatureFunction -> (Float -> Float)
applyFeatureFunction :: FeatureFunction -> Float -> Float
applyFeatureFunction (LogarithmicFunction Float
p1) = \Float
fi -> forall a. Floating a => a -> a
log (Float
p1 forall a. Num a => a -> a -> a
+ Float
fi)
applyFeatureFunction (RationalFunction Float
p1) = \Float
fi -> Float
fi forall a. Fractional a => a -> a -> a
/ (Float
p1 forall a. Num a => a -> a -> a
+ Float
fi)
applyFeatureFunction (SigmoidFunction Float
p1 Float
p2) = \Float
fi -> Float
1 forall a. Fractional a => a -> a -> a
/ (Float
p1 forall a. Num a => a -> a -> a
+ forall a. Floating a => a -> a
exp (-Float
fi forall a. Num a => a -> a -> a
* Float
p2))
scoreTermsBulk :: forall field term feature. (Ix field, Bounded field) =>
Context term field feature ->
Doc term field feature ->
(term -> (field -> Int) -> Float)
scoreTermsBulk :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> (field -> Int) -> Float
scoreTermsBulk Context term field feature
ctx Doc term field feature
doc =
\term
t field -> Int
tFreq ->
let !tf' :: Float
tf' = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ UArray field Float
wforall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f forall a. Num a => a -> a -> a
* Float
tf_f forall a. Fractional a => a -> a -> a
/ UArray field Float
_Bforall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f
| field
f <- forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
, let tf_f :: Float
tf_f = forall a b. (Integral a, Num b) => a -> b
fromIntegral (field -> Int
tFreq field
f)
_B_f :: Float
_B_f = UArray field Float
_Bforall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f
, Bool -> Bool
not (forall a. RealFloat a => a -> Bool
isNaN Float
_B_f)
]
in forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t forall a. Num a => a -> a -> a
* Float
tf'
forall a. Fractional a => a -> a -> a
/ (Float
k1 forall a. Num a => a -> a -> a
+ Float
tf')
where
!k1 :: Float
k1 = forall term field feature. Context term field feature -> Float
paramK1 Context term field feature
ctx
w, _B :: UArray field Float
!w :: UArray field Float
w = forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
[ (field
field, forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
field)
| field
field <- forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) ]
!_B :: UArray field Float
_B = forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
[ (field
field, forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field)
| field
field <- forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) ]
data Explanation field feature term = Explanation {
forall field feature term. Explanation field feature term -> Float
overallScore :: Float,
forall field feature term.
Explanation field feature term -> [(term, Float)]
termScores :: [(term, Float)],
forall field feature term.
Explanation field feature term -> [(feature, Float)]
nonTermScores :: [(feature, Float)],
forall field feature term.
Explanation field feature term -> [(term, [(field, Float)])]
termFieldScores :: [(term, [(field, Float)])]
}
deriving Int -> Explanation field feature term -> ShowS
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall field feature term.
(Show term, Show feature, Show field) =>
Int -> Explanation field feature term -> ShowS
forall field feature term.
(Show term, Show feature, Show field) =>
[Explanation field feature term] -> ShowS
forall field feature term.
(Show term, Show feature, Show field) =>
Explanation field feature term -> String
showList :: [Explanation field feature term] -> ShowS
$cshowList :: forall field feature term.
(Show term, Show feature, Show field) =>
[Explanation field feature term] -> ShowS
show :: Explanation field feature term -> String
$cshow :: forall field feature term.
(Show term, Show feature, Show field) =>
Explanation field feature term -> String
showsPrec :: Int -> Explanation field feature term -> ShowS
$cshowsPrec :: forall field feature term.
(Show term, Show feature, Show field) =>
Int -> Explanation field feature term -> ShowS
Show
instance Functor (Explanation field feature) where
fmap :: forall a b.
(a -> b)
-> Explanation field feature a -> Explanation field feature b
fmap a -> b
f e :: Explanation field feature a
e@Explanation{Float
[(feature, Float)]
[(a, Float)]
[(a, [(field, Float)])]
termFieldScores :: [(a, [(field, Float)])]
nonTermScores :: [(feature, Float)]
termScores :: [(a, Float)]
overallScore :: Float
termFieldScores :: forall field feature term.
Explanation field feature term -> [(term, [(field, Float)])]
nonTermScores :: forall field feature term.
Explanation field feature term -> [(feature, Float)]
termScores :: forall field feature term.
Explanation field feature term -> [(term, Float)]
overallScore :: forall field feature term. Explanation field feature term -> Float
..} =
Explanation field feature a
e {
termScores :: [(b, Float)]
termScores = [ (a -> b
f a
t, Float
s) | (a
t, Float
s) <- [(a, Float)]
termScores ],
termFieldScores :: [(b, [(field, Float)])]
termFieldScores = [ (a -> b
f a
t, [(field, Float)]
fs) | (a
t, [(field, Float)]
fs) <- [(a, [(field, Float)])]
termFieldScores ]
}
explain :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature ->
Doc term field feature -> [term] -> Explanation field feature term
explain :: forall field feature term.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature
-> [term]
-> Explanation field feature term
explain Context term field feature
ctx Doc term field feature
doc [term]
ts =
Explanation {Float
[(feature, Float)]
[(term, Float)]
[(term, [(field, Float)])]
termFieldScores :: [(term, [(field, Float)])]
nonTermScores :: [(feature, Float)]
termScores :: [(term, Float)]
overallScore :: Float
termFieldScores :: [(term, [(field, Float)])]
nonTermScores :: [(feature, Float)]
termScores :: [(term, Float)]
overallScore :: Float
..}
where
overallScore :: Float
overallScore = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(term, Float)]
termScores)
forall a. Num a => a -> a -> a
+ forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(feature, Float)]
nonTermScores)
termScores :: [(term, Float)]
termScores = [ (term
t, forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx Doc term field feature
doc term
t) | term
t <- [term]
ts ]
nonTermScores :: [(feature, Float)]
nonTermScores = [ (feature
feature, forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc feature
feature)
| feature
feature <- forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) ]
termFieldScores :: [(term, [(field, Float)])]
termFieldScores =
[ (term
t, [(field, Float)]
fieldScores)
| term
t <- [term]
ts
, let fieldScores :: [(field, Float)]
fieldScores =
[ (field
f, forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx' Doc term field feature
doc term
t)
| field
f <- forall a. Ix a => (a, a) -> [a]
range (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound)
, let ctx' :: Context term field feature
ctx' = Context term field feature
ctx { fieldWeight :: field -> Float
fieldWeight = forall {a}. (Ix a, Bounded a) => a -> field -> Float
fieldWeightOnly field
f }
]
]
fieldWeightOnly :: a -> field -> Float
fieldWeightOnly a
f field
f' | forall {a} {a}.
(Ix a, Ix a, Bounded a, Bounded a) =>
a -> a -> Bool
sameField a
f field
f' = forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
f'
| Bool
otherwise = Float
0
sameField :: a -> a -> Bool
sameField a
f a
f' = forall a. Ix a => (a, a) -> a -> Int
index (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) a
f
forall a. Eq a => a -> a -> Bool
== forall a. Ix a => (a, a) -> a -> Int
index (forall a. Bounded a => a
minBound, forall a. Bounded a => a
maxBound) a
f'