-- Calculate various characteristics on sequence quality
module Metrics where
import Bio.Core
import Bio.Sequence.SFF
import qualified Data.ByteString.Lazy.Char8 as B
-- import Test.QuickCheck
-- | Take the fractional parts of the flows, and sum their squares (the "K²" metric)
quals :: [Flow] -> Flow
quals q = floor $ ((100::Double) - 2*(sqrt $ (/fromIntegral (length q)) $ sum $ map (fromIntegral . (^(2::Integer)) . (flip (-) 50) . (`mod` 100) . (+50)) $ q))
-- | Count number of n's in the sequence
-- The algorithm for generating Ns is a bit opaque, and appears to depend on the magnitude
-- of the noise flow values. We chicken out, and just count the called sequence.
n_count :: ReadBlock -> Int
n_count r = length . filter isN . clip . B.unpack . unSD . bases $ r
where isN x = x=='N' || x == 'n'
clip = take (right-left+1) . drop left
right = fromIntegral $ clip_qual_right (read_header r)
left = fromIntegral $ clip_qual_left (read_header r)