module NLP.Scores
(
accuracy
, recipRank
, avgPrecision
, ari
, mi
, vi
, kullbackLeibler
, jensenShannon
, Count
, Counts
, counts
, sum
, mean
, jaccard
, entropy
, histogram
, countJoint
, countFst
, countSnd
, fstElems
, sndElems
)
where
import qualified Data.Foldable as F
import qualified Data.Traversable as T
import Data.Monoid
import Data.List hiding (sum)
import qualified Data.Set as Set
import qualified Data.Map as Map
import Prelude hiding (sum)
import NLP.Scores.Internals
accuracy :: (Eq a, Fractional c, T.Traversable t, F.Foldable s) => t a -> s a -> c
accuracy xs = mean . fmap fromEnum . zipWithTF (==) xs . F.toList
recipRank :: (Eq a, Fractional b, F.Foldable t) => a -> t a -> b
recipRank y ys =
case [ r | (r,y') <- zip [1::Int ..] . F.toList $ ys , y' == y ] of
[] -> 0
r:_ -> 1/fromIntegral r
avgPrecision :: (Fractional n, Ord a, F.Foldable t) => Set.Set a -> t a -> n
avgPrecision gold _ | Set.size gold == 0 = 0
avgPrecision gold xs =
(/fromIntegral (Set.size gold))
. sum
. map (\(r,rel,cum) -> if rel == 0
then 0
else fromIntegral cum / fromIntegral r)
. takeWhile (\(_,_,cum) -> cum <= Set.size gold)
. snd
. mapAccumL (\z (r,rel) -> (z+rel,(r,rel,z+rel))) 0
$ [ (r,fromEnum $ x `Set.member` gold)
| (x,r) <- zip (F.toList xs) [1::Int ..]]
mi :: (Ord a, Ord b) => Counts a b -> Double
mi (Counts cxy cx cy) =
let n = Map.foldl' (+) 0 cxy
cell (P x y) nxy =
let nx = cx Map.! x
ny = cy Map.! y
in nxy / n * logBase 2 (nxy * n / nx / ny)
in sum [ cell (P x y) nxy | (P x y, nxy) <- Map.toList cxy ]
vi :: (Ord a, Ord b) => Counts a b -> Double
vi cs@(Counts _ cx cy) = entropy (elems cx) + entropy (elems cy) 2 * mi cs
where elems = Map.elems
kullbackLeibler :: (Eq a, Floating a, F.Foldable f, T.Traversable t) => t a -> f a -> a
kullbackLeibler xs ys = sum . zipWithTF f xs $ ys
where f !x !y = let px = x / sx in px `mult` logBase 2 (px/(y/sy))
sx = sum xs
sy = sum ys
mult 0 _ = 0
mult w p = w * p
jensenShannon :: (Eq a, Floating a, T.Traversable t, T.Traversable u) => t a -> u a -> a
jensenShannon xs ys = 0.5 * kullbackLeibler xs zs + 0.5 * kullbackLeibler ys zs
where zs = zipWithTF (+) xs ys
ari :: (Ord a, Ord b) => Counts a b -> Double
ari (Counts cxy cx cy) = (sum1 sum2*sum3/choicen2)
/ (1/2 * (sum2+sum3) (sum2*sum3) / choicen2)
where choicen2 = choice (sum . Map.elems $ cx) 2
sum1 = sum [ choice nij 2 | nij <- Map.elems cxy ]
sum2 = sum [ choice ni 2 | ni <- Map.elems cx ]
sum3 = sum [ choice nj 2 | nj <- Map.elems cy ]
sum :: (F.Foldable t, Num a) => t a -> a
sum = F.foldl' (+) 0
mean :: (F.Foldable t, Fractional n, Real a) => t a -> n
mean xs =
let (P tot len) = F.foldl' (\(P s l) x -> (P (s+x) (l+1))) (P 0 0) xs
in realToFrac tot/len
choice :: (Enum b, Fractional b) => b -> b -> b
choice n k = foldl' (*) 1 [nk+1 .. n] / foldl' (*) 1 [1 .. k]
jaccard :: (Fractional n, Ord a) => Set.Set a -> Set.Set a -> n
jaccard a b =
fromIntegral (Set.size (Set.intersection a b))
/
fromIntegral (Set.size (Set.union a b))
entropy :: (Floating c, F.Foldable t) => t c -> c
entropy cx = negate . getSum . F.foldMap (Sum . f) $ cx
where n = sum cx
logn = logBase 2 n
f nx = nx / n * (logBase 2 nx logn)
histogram :: (Num a, Ord k, F.Foldable t) => t k -> Map.Map k a
histogram = F.foldl' (\ z k -> Map.insertWith' (+) k 1 z) Map.empty
counts :: (Ord a, Ord b, T.Traversable t, F.Foldable s) => t a -> s b -> Counts a b
counts xs = F.foldl' f empty . zipWithTF P xs . F.toList
where f cs@(Counts cxy cx cy) p@(P x y) =
cs { joint = Map.insertWith' (+) p 1 cxy
, marginalFst = Map.insertWith' (+) x 1 cx
, marginalSnd = Map.insertWith' (+) y 1 cy }
countJoint :: (Ord a, Ord b) => a -> b -> Counts a b -> Count
countJoint x y = Map.findWithDefault 0 (P x y) . joint
countFst :: Ord k => k -> Counts k b -> Count
countFst x = Map.findWithDefault 0 x . marginalFst
countSnd :: Ord k => k -> Counts a k -> Count
countSnd y = Map.findWithDefault 0 y . marginalSnd
fstElems :: Counts k b -> [k]
fstElems = Map.keys . marginalFst
sndElems :: Counts a k -> [k]
sndElems = Map.keys . marginalSnd
zipWithTF :: (T.Traversable t, F.Foldable f) =>
(a -> b -> c) -> t a -> f b -> t c
zipWithTF h t f = snd . T.mapAccumL map_one (F.toList f) $ t
where map_one (x:xs) y = (xs, h y x)