module Bio.SamTools.Classify where
import Bio.SamTools.Bam
import Data.List (foldl', intercalate)
import Data.Maybe (isNothing, fromJust)
import Text.Printf (printf)
data Stats a = Class { total :: !Int, innies, outies, lefties, righties :: !a } deriving Show
display :: Insertable x => Stats x -> String
display c = unlines $ map (intercalate "\t")
[ "Alignment" : dispheader (innies c)
, "innies " : disp1 t (innies c)
, "outies " : disp1 t (outies c)
, "lefties " : disp1 t (lefties c)
, "righties " : disp1 t (righties c)
, ["Total reads: ", show t]]
where t = total c
showQuants :: Stats Hist -> String
showQuants c = unlines $ map (intercalate "\t")
[ "Alignment" : header (innies c)
, "innies " : quants t (innies c)
, "outies " : quants t (outies c)
, "lefties " : quants t (lefties c)
, "righties " : quants t (righties c)
, ["Total reads: ", show t]]
where t = total c
percentiles = [0.05,0.25,0.5,0.75,0.95] :: [Double]
quants tot h = printf "%14d" (hcount h)
: printf "%5.2f%%" (200*fromIntegral (hcount h)/fromIntegral tot::Double)
: if hcount h == 0 then map (const " N/A") percentiles
else go (map (round . (*fromIntegral (hcount h))) percentiles) 0 (0,0) (buckets h)
header _h = " count":" prop":map (printf "%6.0f%%" . (*100)) percentiles
go :: [Int] -> Int -> (Int,Int) -> [(Int,Int)] -> [String]
go (0:fs) sum prev inp = " 0" : go fs sum prev inp
go (f:fs) sum prev ((b,c):rest) =
let next = sum+c in
if next >= f
then let b' = b round (fromIntegral (nextf)/fromIntegral c*fromIntegral (bfst prev))
in printf "%7d" b' : (go fs sum prev ((b,c):rest))
else go (f:fs) next (b,c) rest
go [] _ _ _ = []
go _ _ _ [] = error "ran out of reads!?"
class Insertable x where
insert :: Bam1 -> x -> x
disp1 :: Int -> x -> [String]
dispheader :: x -> [String]
classify :: Insertable x => x -> [Bam1] -> Stats x
classify def = foldl' (class1 . bump) (Class 0 def def def def)
bump :: Stats a -> Stats a
bump s = s { total = total s + 1 }
class1 :: Insertable x => Stats x -> Bam1 -> Stats x
class1 c0 b
| isUnmapped b = c0
| isOpposite b =
(if firstUpstream b then add_innie else add_outie) b c0
| otherwise =
(if firstUpstream b then add_rightie else add_leftie) b c0
add_innie, add_outie, add_rightie, add_leftie :: Insertable x => Bam1 -> Stats x -> Stats x
add_innie b c = c { innies = insert b (innies c) }
add_outie b c = c { outies = insert b (outies c) }
add_rightie b c = c { righties = insert b (righties c) }
add_leftie b c = c { lefties = insert b (lefties c) }
isUnmapped, isOpposite, firstUpstream, isBefore :: Bam1 -> Bool
isUnmapped = isNothing . insertSize
isOpposite b = isReverse b /= isMateReverse b
firstUpstream b = isReverse b /= isBefore b
isBefore b = position b < matePosition b
data ClassStats = CS { ccount :: !Int, xsum, x2sum, x3sum, x4sum :: !Double }
deriving Show
data Statistics = S { ncount, mean, stdev, skew, kurt :: Double }
mkstats :: ClassStats -> Statistics
mkstats cs = S n m s sk kt
where
n = fromIntegral (ccount cs)
x = xsum cs
x2 = x2sum cs
x3 = x3sum cs
x4 = x4sum cs
m = x/n
m2 = m*m
m3 = m*m2
s = sqrt ((x2m2*n)/(n1))
sk = (x3 3*m*x2 + 2*m3*n)/(s*s*s*n)
kt = (x4 4*m*x3 + 6*m2*x2 4*m3*x + n*m*m3)/(s*s*s*s*n) 3
statistics :: Stats ClassStats -> Stats Statistics
statistics (Class t i o l r) = (Class t (mkstats i) (mkstats o) (mkstats l) (mkstats r))
instance Insertable ClassStats where
insert b (CS c s s2 s3 s4) = CS (c+1) (s+d) (s2+d^(2::Int)) (s3+d^(3::Int)) (s4+d^(4::Int))
where d = maybe (error ("no insert size?\n"++show b)) fromIntegral $ insertSize b
dispheader _ = [" count"," prop"," mean"," stdev"," skew"," kurt"]
disp1 tot cs = printf "%14d" (ccount cs)
: printf "%5.2f%%" (200*fromIntegral (ccount cs)/fromIntegral tot::Double)
: map (printf "%7.1f") [mean s, stdev s, skew s, kurt s]
where
s = mkstats cs
data Hist = H { hcount :: !Int, buckets :: ![(Int,Int)] } deriving Show
instance Insertable Hist where
insert b h = H (hcount h + 1) (go (fromIntegral $ fromJust $ insertSize b) (buckets h))
where go x ((b1,v):bs@(_:_)) =
if x > b1 then (b1,v):go x bs else inc (b1,v):bs
go x [(b1,v)] = [inc (b1,v)]
inc (x,y) = let y' = y+1 in y' `seq` (x,y')
disp1 tot h = printf "%14d" (hcount h)
: printf "%5.2f%%" (200*fromIntegral (hcount h)/fromIntegral tot::Double)
: map (printf "%7d" . snd) (buckets h)
dispheader h = " count":" prop":(map (printf "%7d" . fst) $ init $ buckets h)++[" >"]
data Collect = Bams { bams :: [Bam1] }
instance Insertable Collect where
insert b (Bams bs) = Bams (b:bs)
disp1 _ (Bams bs) = [unlines $ ("":map show bs)]
dispheader = const ["foo"]