{-# LANGUAGE BangPatterns #-}
module NLP.Scores 
    ( 
      sum
    , mean
    , accuracy
    , recipRank
    , avgPrecision
    , jaccard
    )
where
import Data.List hiding (sum)
import qualified Data.Set as Set
import Prelude hiding (sum)

-- | The sum of a list of numbers (without overflowing stack, 
-- unlike 'Prelude.sum').
sum :: (Num a) => [a] -> a
sum = foldl' (+) 0

-- | The mean of a list of numbers.
mean :: (Fractional n, Real a) => [a] -> n
mean xs = 
    let (sum,len) = foldl' (\(!s,!l) x -> (s+x,l+1)) (0,0) xs
    in realToFrac sum/len

-- | Accuracy: the proportion of elements in the first list equal to 
-- elements at corresponding positions in second list. Lists should be
-- of equal lengths.
accuracy :: (Eq a, Fractional n) => [a] -> [a] -> n
accuracy xs = mean . map fromEnum . zipWith (==) xs
 
-- | Reciprocal rank: the reciprocal of the rank at which the first arguments
-- occurs in the list given as the second argument.
recipRank :: (Eq a, Fractional n) => a -> [a] -> n
recipRank y ys = 
    case [ r | (r,y') <- zip [1..] ys , y' == y ] of
      []  -> 0
      r:_ -> 1/fromIntegral r

-- | Average precision. 
-- <http://en.wikipedia.org/wiki/Information_retrieval#Average_precision>
avgPrecision :: (Fractional n, Ord a) => Set.Set a -> [a] -> n
avgPrecision gold _ | Set.size gold == 0 = 0
avgPrecision gold xs =
      (/fromIntegral (Set.size gold))
    . sum 
    . map (\(r,rel,cum) -> if rel == 0 
                          then 0 
                          else fromIntegral cum / fromIntegral r)
    . takeWhile (\(_,_,cum) -> cum <= Set.size gold) 
    . snd 
    . mapAccumL (\z (r,rel) -> (z+rel,(r,rel,z+rel))) 0
    $ [ (r,fromEnum $ x `Set.member` gold) | (x,r) <- zip xs [1..]]

-- | Jaccard coefficient
-- J(A,B) = |AB| / |A union B|
jaccard :: (Fractional n, Ord a) => Set.Set a -> Set.Set a -> n
jaccard a b = 
  fromIntegral (Set.size (Set.intersection a b))
  / 
  fromIntegral (Set.size (Set.union a b))