{-# LANGUAGE RecordWildCards, BangPatterns, ScopedTypeVariables #-}
module Data.SearchEngine.BM25F (
    
    score,
    Context(..),
    FeatureFunction(..),
    Doc(..),
    
    scoreTermsBulk,
    
    Explanation(..),
    explain,
  ) where
import Data.Ix
import Data.Array.Unboxed
data Context term field feature = Context {
         forall term field feature. Context term field feature -> Int
numDocsTotal     :: !Int,
         forall term field feature.
Context term field feature -> field -> Float
avgFieldLength   :: field -> Float,
         forall term field feature.
Context term field feature -> term -> Int
numDocsWithTerm  :: term -> Int,
         forall term field feature. Context term field feature -> Float
paramK1          :: !Float,
         forall term field feature.
Context term field feature -> field -> Float
paramB           :: field -> Float,
         
         forall term field feature.
Context term field feature -> field -> Float
fieldWeight      :: field -> Float,
         forall term field feature.
Context term field feature -> feature -> Float
featureWeight    :: feature -> Float,
         forall term field feature.
Context term field feature -> feature -> FeatureFunction
featureFunction  :: feature -> FeatureFunction
       }
data Doc term field feature = Doc {
         forall term field feature. Doc term field feature -> field -> Int
docFieldLength        :: field -> Int,
         forall term field feature.
Doc term field feature -> field -> term -> Int
docFieldTermFrequency :: field -> term -> Int,
         forall term field feature.
Doc term field feature -> feature -> Float
docFeatureValue       :: feature -> Float
       }
score :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
         Context term field feature ->
         Doc term field feature -> [term] -> Float
score :: forall field feature term.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> [term] -> Float
score Context term field feature
ctx Doc term field feature
doc [term]
terms =
    [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((term -> Float) -> [term] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (Context term field feature
-> Doc term field feature -> term -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore    Context term field feature
ctx Doc term field feature
doc) [term]
terms)
  Float -> Float -> Float
forall a. Num a => a -> a -> a
+ [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum ((feature -> Float) -> [feature] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (Context term field feature
-> Doc term field feature -> feature -> Float
forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc) [feature]
features)
  where
    features :: [feature]
features = (feature, feature) -> [feature]
forall a. Ix a => (a, a) -> [a]
range (feature
forall a. Bounded a => a
minBound, feature
forall a. Bounded a => a
maxBound)
weightedTermScore :: (Ix field, Bounded field) =>
                     Context term field feature ->
                     Doc term field feature -> term -> Float
weightedTermScore :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx Doc term field feature
doc term
t =
    Context term field feature -> term -> Float
forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t Float -> Float -> Float
forall a. Num a => a -> a -> a
*     Float
tf'
                     Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
k1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
tf')
  where
    tf' :: Float
tf' = Context term field feature
-> Doc term field feature -> term -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedDocTermFrequency Context term field feature
ctx Doc term field feature
doc term
t
    k1 :: Float
k1  = Context term field feature -> Float
forall term field feature. Context term field feature -> Float
paramK1 Context term field feature
ctx
weightIDF :: Context term field feature -> term -> Float
weightIDF :: forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t =
    Float -> Float
forall a. Floating a => a -> a
log ((Float
n Float -> Float -> Float
forall a. Num a => a -> a -> a
- Float
n_t Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
0.5) Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
n_t Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
0.5))
  where
    n :: Float
n   = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Context term field feature -> Int
forall term field feature. Context term field feature -> Int
numDocsTotal Context term field feature
ctx)
    n_t :: Float
n_t = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Context term field feature -> term -> Int
forall term field feature.
Context term field feature -> term -> Int
numDocsWithTerm Context term field feature
ctx term
t)
weightedDocTermFrequency :: (Ix field, Bounded field) =>
                            Context term field feature ->
                            Doc term field feature -> term -> Float
weightedDocTermFrequency :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedDocTermFrequency Context term field feature
ctx Doc term field feature
doc term
t =
    [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ Float
w_f Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
tf_f Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
_B_f
        | field
field <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
        , let w_f :: Float
w_f  = Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
field
              tf_f :: Float
tf_f = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Doc term field feature -> field -> term -> Int
forall term field feature.
Doc term field feature -> field -> term -> Int
docFieldTermFrequency Doc term field feature
doc field
field term
t)
              _B_f :: Float
_B_f = Context term field feature
-> Doc term field feature -> field -> Float
forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field
        , Bool -> Bool
not (Float -> Bool
forall a. RealFloat a => a -> Bool
isNaN Float
_B_f)
        ]
    
    
    
    
    
    
lengthNorm :: Context term field feature ->
              Doc term field feature -> field -> Float
lengthNorm :: forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field =
    (Float
1Float -> Float -> Float
forall a. Num a => a -> a -> a
-Float
b_f) Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
b_f Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
sl_f Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ Float
avgsl_f
  where
    b_f :: Float
b_f     = Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
paramB Context term field feature
ctx field
field
    sl_f :: Float
sl_f    = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Doc term field feature -> field -> Int
forall term field feature. Doc term field feature -> field -> Int
docFieldLength Doc term field feature
doc field
field)
    avgsl_f :: Float
avgsl_f = Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
avgFieldLength Context term field feature
ctx field
field
weightedNonTermScore :: (Ix feature, Bounded feature) =>
                        Context term field feature ->
                        Doc term field feature -> feature -> Float
weightedNonTermScore :: forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc feature
feature =
    Float
w_f Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float -> Float
_V_f Float
f_f
  where
    w_f :: Float
w_f  = Context term field feature -> feature -> Float
forall term field feature.
Context term field feature -> feature -> Float
featureWeight Context term field feature
ctx feature
feature
    _V_f :: Float -> Float
_V_f = FeatureFunction -> Float -> Float
applyFeatureFunction (Context term field feature -> feature -> FeatureFunction
forall term field feature.
Context term field feature -> feature -> FeatureFunction
featureFunction Context term field feature
ctx feature
feature)
    f_f :: Float
f_f  = Doc term field feature -> feature -> Float
forall term field feature.
Doc term field feature -> feature -> Float
docFeatureValue Doc term field feature
doc feature
feature
data FeatureFunction
   = LogarithmicFunction   Float 
   | RationalFunction      Float 
   | SigmoidFunction Float Float 
applyFeatureFunction :: FeatureFunction -> (Float -> Float)
applyFeatureFunction :: FeatureFunction -> Float -> Float
applyFeatureFunction (LogarithmicFunction Float
p1) = \Float
fi -> Float -> Float
forall a. Floating a => a -> a
log (Float
p1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
fi)
applyFeatureFunction (RationalFunction    Float
p1) = \Float
fi -> Float
fi Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
p1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
fi)
applyFeatureFunction (SigmoidFunction  Float
p1 Float
p2) = \Float
fi -> Float
1 Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
p1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float -> Float
forall a. Floating a => a -> a
exp (-Float
fi Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
p2))
scoreTermsBulk :: forall field term feature. (Ix field, Bounded field) =>
                  Context term field feature ->
                  Doc term field feature ->
                  (term -> (field -> Int) -> Float)
scoreTermsBulk :: forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> (field -> Int) -> Float
scoreTermsBulk Context term field feature
ctx Doc term field feature
doc = 
    
    
    \term
t field -> Int
tFreq ->
    let !tf' :: Float
tf' = [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ UArray field Float
wUArray field Float -> field -> Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
tf_f Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ UArray field Float
_BUArray field Float -> field -> Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f
                   | field
f <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
                   , let tf_f :: Float
tf_f = Int -> Float
forall a b. (Integral a, Num b) => a -> b
fromIntegral (field -> Int
tFreq field
f)
                         _B_f :: Float
_B_f = UArray field Float
_BUArray field Float -> field -> Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> i -> e
!field
f
                   , Bool -> Bool
not (Float -> Bool
forall a. RealFloat a => a -> Bool
isNaN Float
_B_f)
                   ]
     in Context term field feature -> term -> Float
forall term field feature.
Context term field feature -> term -> Float
weightIDF Context term field feature
ctx term
t Float -> Float -> Float
forall a. Num a => a -> a -> a
*     Float
tf'
                         Float -> Float -> Float
forall a. Fractional a => a -> a -> a
/ (Float
k1 Float -> Float -> Float
forall a. Num a => a -> a -> a
+ Float
tf')
  where
    
    
    !k1 :: Float
k1 = Context term field feature -> Float
forall term field feature. Context term field feature -> Float
paramK1 Context term field feature
ctx
    w, _B :: UArray field Float
    !w :: UArray field Float
w  = (field, field) -> [(field, Float)] -> UArray field Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
                [ (field
field, Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
field)
                | field
field <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound) ]
    !_B :: UArray field Float
_B = (field, field) -> [(field, Float)] -> UArray field Float
forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
(i, i) -> [(i, e)] -> a i e
array (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
                [ (field
field, Context term field feature
-> Doc term field feature -> field -> Float
forall term field feature.
Context term field feature
-> Doc term field feature -> field -> Float
lengthNorm Context term field feature
ctx Doc term field feature
doc field
field)
                | field
field <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound) ]
data Explanation field feature term = Explanation {
       
       
       forall field feature term. Explanation field feature term -> Float
overallScore  :: Float,
       
       
       
       forall field feature term.
Explanation field feature term -> [(term, Float)]
termScores    :: [(term, Float)],
       
       
       forall field feature term.
Explanation field feature term -> [(feature, Float)]
nonTermScores :: [(feature, Float)],
       
       
       
       
       
       
       
       
       
       
       forall field feature term.
Explanation field feature term -> [(term, [(field, Float)])]
termFieldScores :: [(term, [(field, Float)])]
     }
  deriving Int -> Explanation field feature term -> ShowS
[Explanation field feature term] -> ShowS
Explanation field feature term -> String
(Int -> Explanation field feature term -> ShowS)
-> (Explanation field feature term -> String)
-> ([Explanation field feature term] -> ShowS)
-> Show (Explanation field feature term)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall field feature term.
(Show term, Show feature, Show field) =>
Int -> Explanation field feature term -> ShowS
forall field feature term.
(Show term, Show feature, Show field) =>
[Explanation field feature term] -> ShowS
forall field feature term.
(Show term, Show feature, Show field) =>
Explanation field feature term -> String
$cshowsPrec :: forall field feature term.
(Show term, Show feature, Show field) =>
Int -> Explanation field feature term -> ShowS
showsPrec :: Int -> Explanation field feature term -> ShowS
$cshow :: forall field feature term.
(Show term, Show feature, Show field) =>
Explanation field feature term -> String
show :: Explanation field feature term -> String
$cshowList :: forall field feature term.
(Show term, Show feature, Show field) =>
[Explanation field feature term] -> ShowS
showList :: [Explanation field feature term] -> ShowS
Show
instance Functor (Explanation field feature) where
  fmap :: forall a b.
(a -> b)
-> Explanation field feature a -> Explanation field feature b
fmap a -> b
f e :: Explanation field feature a
e@Explanation{Float
[(feature, Float)]
[(a, Float)]
[(a, [(field, Float)])]
termScores :: forall field feature term.
Explanation field feature term -> [(term, Float)]
overallScore :: forall field feature term. Explanation field feature term -> Float
termFieldScores :: forall field feature term.
Explanation field feature term -> [(term, [(field, Float)])]
nonTermScores :: forall field feature term.
Explanation field feature term -> [(feature, Float)]
overallScore :: Float
termScores :: [(a, Float)]
nonTermScores :: [(feature, Float)]
termFieldScores :: [(a, [(field, Float)])]
..} =
    Explanation field feature a
e {
      termScores      = [ (f t, s)  | (t, s)  <- termScores ],
      termFieldScores = [ (f t, fs) | (t, fs) <- termFieldScores ]
    }
explain :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
           Context term field feature ->
           Doc term field feature -> [term] -> Explanation field feature term
explain :: forall field feature term.
(Ix field, Bounded field, Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature
-> [term]
-> Explanation field feature term
explain Context term field feature
ctx Doc term field feature
doc [term]
ts =
    Explanation {Float
[(feature, Float)]
[(term, Float)]
[(term, [(field, Float)])]
termScores :: [(term, Float)]
overallScore :: Float
termFieldScores :: [(term, [(field, Float)])]
nonTermScores :: [(feature, Float)]
overallScore :: Float
termScores :: [(term, Float)]
nonTermScores :: [(feature, Float)]
termFieldScores :: [(term, [(field, Float)])]
..}
  where
    overallScore :: Float
overallScore  = [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((term, Float) -> Float) -> [(term, Float)] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (term, Float) -> Float
forall a b. (a, b) -> b
snd [(term, Float)]
termScores)
                  Float -> Float -> Float
forall a. Num a => a -> a -> a
+ [Float] -> Float
forall a. Num a => [a] -> a
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (((feature, Float) -> Float) -> [(feature, Float)] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map (feature, Float) -> Float
forall a b. (a, b) -> b
snd [(feature, Float)]
nonTermScores)
    termScores :: [(term, Float)]
termScores    = [ (term
t, Context term field feature
-> Doc term field feature -> term -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx Doc term field feature
doc term
t) | term
t <- [term]
ts ]
    nonTermScores :: [(feature, Float)]
nonTermScores = [ (feature
feature, Context term field feature
-> Doc term field feature -> feature -> Float
forall feature term field.
(Ix feature, Bounded feature) =>
Context term field feature
-> Doc term field feature -> feature -> Float
weightedNonTermScore Context term field feature
ctx Doc term field feature
doc feature
feature)
                    | feature
feature <- (feature, feature) -> [feature]
forall a. Ix a => (a, a) -> [a]
range (feature
forall a. Bounded a => a
minBound, feature
forall a. Bounded a => a
maxBound) ]
    termFieldScores :: [(term, [(field, Float)])]
termFieldScores =
      [ (term
t, [(field, Float)]
fieldScores)
      | term
t <- [term]
ts
      , let fieldScores :: [(field, Float)]
fieldScores =
              [ (field
f, Context term field feature
-> Doc term field feature -> term -> Float
forall field term feature.
(Ix field, Bounded field) =>
Context term field feature
-> Doc term field feature -> term -> Float
weightedTermScore Context term field feature
ctx' Doc term field feature
doc term
t)
              | field
f <- (field, field) -> [field]
forall a. Ix a => (a, a) -> [a]
range (field
forall a. Bounded a => a
minBound, field
forall a. Bounded a => a
maxBound)
              , let ctx' :: Context term field feature
ctx' = Context term field feature
ctx { fieldWeight = fieldWeightOnly f }
              ]
      ]
    fieldWeightOnly :: a -> field -> Float
fieldWeightOnly a
f field
f' | a -> field -> Bool
forall {a} {a}.
(Ix a, Ix a, Bounded a, Bounded a) =>
a -> a -> Bool
sameField a
f field
f' = Context term field feature -> field -> Float
forall term field feature.
Context term field feature -> field -> Float
fieldWeight Context term field feature
ctx field
f'
                         | Bool
otherwise      = Float
0
    sameField :: a -> a -> Bool
sameField a
f a
f' = (a, a) -> a -> Int
forall a. Ix a => (a, a) -> a -> Int
index (a
forall a. Bounded a => a
minBound, a
forall a. Bounded a => a
maxBound) a
f
                  Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== (a, a) -> a -> Int
forall a. Ix a => (a, a) -> a -> Int
index (a
forall a. Bounded a => a
minBound, a
forall a. Bounded a => a
maxBound) a
f'