module Bio.Sequence.Entropy (KWords(..), entropy) where

import Data.List

class KWords s where
   kwords :: Int -> s -> [s]

instance KWords [a] where
   kwords k = dropWhile ((<k) . length) . reverse . map (take k) . tails

-- naïve implementation, but possibly sufficient
-- could use a Map of words instead
-- this calculates the entropy of the k-words in the string
-- this is NOT the same as kth order entropy
entropy :: (Ord str, KWords str) => Int -> str -> Double
entropy k s = negate . sum . map nlogn $ probs ls
    where ls = map (fromIntegral . length) . group . sort . kwords k $ s

nlogn :: (Floating a) => a -> a
nlogn x = x*log x/log 2
probs :: (Fractional a) => [a] -> [a]
probs ls = map (/ sum ls) ls