-- | Things specific to ancient DNA, e.g. damage models.
--
-- For aDNA, we need a substitution probability.  We have three options:
-- use an empirically determined PSSM, use an arithmetically defined
-- PSSM based on the /Johnson/ model, use a context sensitive PSSM based
-- on the /Johnson/ model and an alignment.  Using /Dindel/, actual
-- substitutions relative to a called haplotype would be taken into
-- account.  Since we're not going to do that, taking alignments into
-- account is difficult, somewhat approximate, and therefore not worth
-- the hassle.

module Bio.Adna (
    DmgStats(..),
    CompositionStats,
    SubstitutionStats,
    addFragType,
    damagePatternsIter,
    damagePatternsIterMD,
    damagePatternsIter2Bit,
    alnFromMd,

    DamageParameters(..),
    NewDamageParameters(..),
    GenDamageParameters(..),
    DamageModel,
    bang, nudge,
    Alignment(..),
    FragType(..),
    Subst(..),

    NPair,
    npair,
    fst_np,
    snd_np,

    noDamage,
    univDamage,
    empDamage,
    Mat44D(..),
    MMat44D(..),
    scalarMat,
    complMat,
    freezeMats,

    bwa_cal_maxdiff
  ) where

import Bio.Bam
import Bio.Prelude
import Bio.TwoBit

import qualified Data.Vector                    as V
import qualified Data.Vector.Generic            as G
import qualified Data.Vector.Storable           as VS
import qualified Data.Vector.Unboxed            as U
import qualified Data.Vector.Unboxed.Mutable    as UM
import qualified Streaming.Prelude              as Q

-- | We represent substitution matrices by the type 'Mat44D'.  Internally,
-- this is a vector of packed vectors.  Conveniently, each of the packed
-- vectors represents all transitions /into/ the given nucleotide.

newtype Mat44D = Mat44D (U.Vector Double) deriving (Int -> Mat44D -> ShowS
[Mat44D] -> ShowS
Mat44D -> String
(Int -> Mat44D -> ShowS)
-> (Mat44D -> String) -> ([Mat44D] -> ShowS) -> Show Mat44D
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Mat44D] -> ShowS
$cshowList :: [Mat44D] -> ShowS
show :: Mat44D -> String
$cshow :: Mat44D -> String
showsPrec :: Int -> Mat44D -> ShowS
$cshowsPrec :: Int -> Mat44D -> ShowS
Show, (forall x. Mat44D -> Rep Mat44D x)
-> (forall x. Rep Mat44D x -> Mat44D) -> Generic Mat44D
forall x. Rep Mat44D x -> Mat44D
forall x. Mat44D -> Rep Mat44D x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Mat44D x -> Mat44D
$cfrom :: forall x. Mat44D -> Rep Mat44D x
Generic)
newtype MMat44D = MMat44D (UM.IOVector Double)

-- | A 'DamageModel' is a function that gives substitution matrices for
-- each position in a read.  The 'DamageModel' can depend on whether the
-- alignment is reversed, the length of the read and the position.  (In
-- practice, we should probably memoize precomputed damage models
-- somehow.)

type DamageModel = Bool -> Int -> Int -> Mat44D
data Subst = Nucleotide :-> Nucleotide deriving (Subst -> Subst -> Bool
(Subst -> Subst -> Bool) -> (Subst -> Subst -> Bool) -> Eq Subst
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Subst -> Subst -> Bool
$c/= :: Subst -> Subst -> Bool
== :: Subst -> Subst -> Bool
$c== :: Subst -> Subst -> Bool
Eq, Eq Subst
Eq Subst =>
(Subst -> Subst -> Ordering)
-> (Subst -> Subst -> Bool)
-> (Subst -> Subst -> Bool)
-> (Subst -> Subst -> Bool)
-> (Subst -> Subst -> Bool)
-> (Subst -> Subst -> Subst)
-> (Subst -> Subst -> Subst)
-> Ord Subst
Subst -> Subst -> Bool
Subst -> Subst -> Ordering
Subst -> Subst -> Subst
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Subst -> Subst -> Subst
$cmin :: Subst -> Subst -> Subst
max :: Subst -> Subst -> Subst
$cmax :: Subst -> Subst -> Subst
>= :: Subst -> Subst -> Bool
$c>= :: Subst -> Subst -> Bool
> :: Subst -> Subst -> Bool
$c> :: Subst -> Subst -> Bool
<= :: Subst -> Subst -> Bool
$c<= :: Subst -> Subst -> Bool
< :: Subst -> Subst -> Bool
$c< :: Subst -> Subst -> Bool
compare :: Subst -> Subst -> Ordering
$ccompare :: Subst -> Subst -> Ordering
$cp1Ord :: Eq Subst
Ord, Ord Subst
Ord Subst =>
((Subst, Subst) -> [Subst])
-> ((Subst, Subst) -> Subst -> Int)
-> ((Subst, Subst) -> Subst -> Int)
-> ((Subst, Subst) -> Subst -> Bool)
-> ((Subst, Subst) -> Int)
-> ((Subst, Subst) -> Int)
-> Ix Subst
(Subst, Subst) -> Int
(Subst, Subst) -> [Subst]
(Subst, Subst) -> Subst -> Bool
(Subst, Subst) -> Subst -> Int
forall a.
Ord a =>
((a, a) -> [a])
-> ((a, a) -> a -> Int)
-> ((a, a) -> a -> Int)
-> ((a, a) -> a -> Bool)
-> ((a, a) -> Int)
-> ((a, a) -> Int)
-> Ix a
unsafeRangeSize :: (Subst, Subst) -> Int
$cunsafeRangeSize :: (Subst, Subst) -> Int
rangeSize :: (Subst, Subst) -> Int
$crangeSize :: (Subst, Subst) -> Int
inRange :: (Subst, Subst) -> Subst -> Bool
$cinRange :: (Subst, Subst) -> Subst -> Bool
unsafeIndex :: (Subst, Subst) -> Subst -> Int
$cunsafeIndex :: (Subst, Subst) -> Subst -> Int
index :: (Subst, Subst) -> Subst -> Int
$cindex :: (Subst, Subst) -> Subst -> Int
range :: (Subst, Subst) -> [Subst]
$crange :: (Subst, Subst) -> [Subst]
$cp1Ix :: Ord Subst
Ix, Int -> Subst -> ShowS
[Subst] -> ShowS
Subst -> String
(Int -> Subst -> ShowS)
-> (Subst -> String) -> ([Subst] -> ShowS) -> Show Subst
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Subst] -> ShowS
$cshowList :: [Subst] -> ShowS
show :: Subst -> String
$cshow :: Subst -> String
showsPrec :: Int -> Subst -> ShowS
$cshowsPrec :: Int -> Subst -> ShowS
Show)

infix 9 :->
infix 8 `bang`

-- | Convenience function to access a substitution matrix that has a
-- mnemonic reading.
{-# INLINE bang #-}
bang :: Mat44D -> Subst -> Double
bang :: Mat44D -> Subst -> Double
bang (Mat44D v :: Vector Double
v) (N x :: Word8
x :-> N y :: Word8
y)
    | Vector Double -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Double
v Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 16 = Vector Double
v Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
y)
    | Bool
otherwise = String -> Double
forall a. HasCallStack => String -> a
error (String -> Double) -> String -> Double
forall a b. (a -> b) -> a -> b
$ "Huh? " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Vector Double -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Double
v)

{-# INLINE nudge #-}
nudge :: MMat44D -> Subst -> Double -> IO ()
nudge :: MMat44D -> Subst -> Double -> IO ()
nudge (MMat44D v :: IOVector Double
v) (N x :: Word8
x :-> N y :: Word8
y) a :: Double
a = MVector (PrimState IO) Double -> Int -> IO Double
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
UM.read IOVector Double
MVector (PrimState IO) Double
v Int
i IO Double -> (Double -> IO ()) -> IO ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MVector (PrimState IO) Double -> Int -> Double -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UM.write IOVector Double
MVector (PrimState IO) Double
v Int
i (Double -> IO ()) -> (Double -> Double) -> Double -> IO ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) Double
a
  where i :: Int
i = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
x Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
y

scalarMat :: Double -> Mat44D
scalarMat :: Double -> Mat44D
scalarMat s :: Double
s = Vector Double -> Mat44D
Mat44D (Vector Double -> Mat44D) -> Vector Double -> Mat44D
forall a b. (a -> b) -> a -> b
$ Int -> [Double] -> Vector Double
forall a. Unbox a => Int -> [a] -> Vector a
U.fromListN 16 [ Double
s, 0, 0, 0
                                      , 0, Double
s, 0, 0
                                      , 0, 0, Double
s, 0
                                      , 0, 0, 0, Double
s ]
complMat :: Mat44D -> Mat44D
complMat :: Mat44D -> Mat44D
complMat v :: Mat44D
v = Vector Double -> Mat44D
Mat44D (Vector Double -> Mat44D) -> Vector Double -> Mat44D
forall a b. (a -> b) -> a -> b
$ Int -> [Double] -> Vector Double
forall a. Unbox a => Int -> [a] -> Vector a
U.fromListN 16 [ Mat44D
v Mat44D -> Subst -> Double
`bang` Nucleotide -> Nucleotide
compl Nucleotide
x Nucleotide -> Nucleotide -> Subst
:-> Nucleotide -> Nucleotide
compl Nucleotide
y
                                     | Nucleotide
y <- (Nucleotide, Nucleotide) -> [Nucleotide]
forall a. Ix a => (a, a) -> [a]
range (Nucleotide
nucA, Nucleotide
nucT)
                                     , Nucleotide
x <- (Nucleotide, Nucleotide) -> [Nucleotide]
forall a. Ix a => (a, a) -> [a]
range (Nucleotide
nucA, Nucleotide
nucT) ]

-- | Adds the two matrices of a mutable substitution model (one for each
-- strand) appropriately, normalizes the result (to make probabilities
-- from pseudo-counts), and freezes that into one immutable matrix.  We
-- add a single count everywhere to avoid getting NaN from bizarre data.
freezeMats :: MMat44D -> MMat44D -> IO Mat44D
freezeMats :: MMat44D -> MMat44D -> IO Mat44D
freezeMats (MMat44D vv :: IOVector Double
vv) (MMat44D ww :: IOVector Double
ww) = do
    Mat44D
v <-            Vector Double -> Mat44D
Mat44D (Vector Double -> Mat44D) -> IO (Vector Double) -> IO Mat44D
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) Double -> IO (Vector Double)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.freeze IOVector Double
MVector (PrimState IO) Double
vv
    Mat44D
w <- Mat44D -> Mat44D
complMat (Mat44D -> Mat44D)
-> (Vector Double -> Mat44D) -> Vector Double -> Mat44D
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Double -> Mat44D
Mat44D (Vector Double -> Mat44D) -> IO (Vector Double) -> IO Mat44D
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) Double -> IO (Vector Double)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.freeze IOVector Double
MVector (PrimState IO) Double
ww

    let sums :: Vector Double
sums = Int -> (Int -> Double) -> Vector Double
forall a. Unbox a => Int -> (Int -> a) -> Vector a
U.generate 4 ((Int -> Double) -> Vector Double)
-> (Int -> Double) -> Vector Double
forall a b. (a -> b) -> a -> b
$ \x0 :: Int
x0 ->
                    let x :: Nucleotide
x = Word8 -> Nucleotide
N (Word8 -> Nucleotide) -> Word8 -> Nucleotide
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x0
                    in [Double] -> Double
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ Mat44D
v Mat44D -> Subst -> Double
`bang` Nucleotide
x Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
z Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Mat44D
w Mat44D -> Subst -> Double
`bang` Nucleotide
x Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
z
                           | Nucleotide
z <- (Nucleotide, Nucleotide) -> [Nucleotide]
forall a. Ix a => (a, a) -> [a]
range (Nucleotide
nucA, Nucleotide
nucT) ] Double -> Double -> Double
forall a. Num a => a -> a -> a
+ 4

    Mat44D -> IO Mat44D
forall (m :: * -> *) a. Monad m => a -> m a
return (Mat44D -> IO Mat44D)
-> (Vector Double -> Mat44D) -> Vector Double -> IO Mat44D
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Vector Double -> Mat44D
Mat44D (Vector Double -> IO Mat44D) -> Vector Double -> IO Mat44D
forall a b. (a -> b) -> a -> b
$ Int -> [Double] -> Vector Double
forall a. Unbox a => Int -> [a] -> Vector a
U.fromListN 16
            [ (Mat44D
v Mat44D -> Subst -> Double
`bang` Nucleotide
x Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
y Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Mat44D
w Mat44D -> Subst -> Double
`bang` Nucleotide
x Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
y Double -> Double -> Double
forall a. Num a => a -> a -> a
+ 1) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
s
            | Nucleotide
y <- (Nucleotide, Nucleotide) -> [Nucleotide]
forall a. Ix a => (a, a) -> [a]
range (Nucleotide
nucA, Nucleotide
nucT)
            , Nucleotide
x <- (Nucleotide, Nucleotide) -> [Nucleotide]
forall a. Ix a => (a, a) -> [a]
range (Nucleotide
nucA, Nucleotide
nucT)
            , let s :: Double
s = Vector Double
sums Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Nucleotide -> Word8
unN Nucleotide
x) ]


-- | 'DamageModel' for undamaged DNA.  The likelihoods follow directly
-- from the quality score.  This needs elaboration to see what to do
-- with amibiguity codes (even though those haven't actually been
-- observed in the wild).
noDamage :: DamageModel
noDamage :: DamageModel
noDamage _ _ _ = Mat44D
one where !one :: Mat44D
one = Double -> Mat44D
scalarMat 1


-- | Parameters for the universal damage model.
--
-- We assume the correct model is either no damage, or single strand
-- damage, or double strand damage.  Each of them comes with a
-- probability.  It turns out that blending them into one is simply
-- accomplished by multiplying these probabilities onto the deamination
-- probabilities.
--
-- For single stranded library prep, only one kind of damage occurs (C
-- frequency ('ssd_sigma') in single stranded parts, and the overhang
-- length is distributed exponentially with parameter 'ssd_lambda' at
-- the 5' end and 'ssd_kappa' at the 3' end.  (Without UDG treatment,
-- those will be equal.  With UDG, those are much smaller and in fact
-- don't literally represent overhangs.)
--
-- For double stranded library prep, we get C->T damage at the 5' end
-- and G->A at the 3' end with rate 'dsd_sigma' and both in the interior
-- with rate 'dsd_delta'.  Everything is symmetric, and therefore the
-- orientation of the aligned read doesn't matter either.  Both
-- overhangs follow a distribution with parameter 'dsd_lambda'.

data DamageParameters float = DP { DamageParameters float -> float
ssd_sigma  :: !float         -- deamination rate in ss DNA, SS model
                                 , DamageParameters float -> float
ssd_delta  :: !float         -- deamination rate in ds DNA, SS model
                                 , DamageParameters float -> float
ssd_lambda :: !float         -- param for geom. distribution, 5' end, SS model
                                 , DamageParameters float -> float
ssd_kappa  :: !float         -- param for geom. distribution, 3' end, SS model
                                 , DamageParameters float -> float
dsd_sigma  :: !float         -- deamination rate in ss DNA, DS model
                                 , DamageParameters float -> float
dsd_delta  :: !float         -- deamination rate in ds DNA, DS model
                                 , DamageParameters float -> float
dsd_lambda :: !float }       -- param for geom. distribution, DS model
  deriving (ReadPrec [DamageParameters float]
ReadPrec (DamageParameters float)
Int -> ReadS (DamageParameters float)
ReadS [DamageParameters float]
(Int -> ReadS (DamageParameters float))
-> ReadS [DamageParameters float]
-> ReadPrec (DamageParameters float)
-> ReadPrec [DamageParameters float]
-> Read (DamageParameters float)
forall float. Read float => ReadPrec [DamageParameters float]
forall float. Read float => ReadPrec (DamageParameters float)
forall float. Read float => Int -> ReadS (DamageParameters float)
forall float. Read float => ReadS [DamageParameters float]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [DamageParameters float]
$creadListPrec :: forall float. Read float => ReadPrec [DamageParameters float]
readPrec :: ReadPrec (DamageParameters float)
$creadPrec :: forall float. Read float => ReadPrec (DamageParameters float)
readList :: ReadS [DamageParameters float]
$creadList :: forall float. Read float => ReadS [DamageParameters float]
readsPrec :: Int -> ReadS (DamageParameters float)
$creadsPrec :: forall float. Read float => Int -> ReadS (DamageParameters float)
Read, Int -> DamageParameters float -> ShowS
[DamageParameters float] -> ShowS
DamageParameters float -> String
(Int -> DamageParameters float -> ShowS)
-> (DamageParameters float -> String)
-> ([DamageParameters float] -> ShowS)
-> Show (DamageParameters float)
forall float. Show float => Int -> DamageParameters float -> ShowS
forall float. Show float => [DamageParameters float] -> ShowS
forall float. Show float => DamageParameters float -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DamageParameters float] -> ShowS
$cshowList :: forall float. Show float => [DamageParameters float] -> ShowS
show :: DamageParameters float -> String
$cshow :: forall float. Show float => DamageParameters float -> String
showsPrec :: Int -> DamageParameters float -> ShowS
$cshowsPrec :: forall float. Show float => Int -> DamageParameters float -> ShowS
Show, (forall x.
 DamageParameters float -> Rep (DamageParameters float) x)
-> (forall x.
    Rep (DamageParameters float) x -> DamageParameters float)
-> Generic (DamageParameters float)
forall x. Rep (DamageParameters float) x -> DamageParameters float
forall x. DamageParameters float -> Rep (DamageParameters float) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall float x.
Rep (DamageParameters float) x -> DamageParameters float
forall float x.
DamageParameters float -> Rep (DamageParameters float) x
$cto :: forall float x.
Rep (DamageParameters float) x -> DamageParameters float
$cfrom :: forall float x.
DamageParameters float -> Rep (DamageParameters float) x
Generic)

data NewDamageParameters vec float = NDP { NewDamageParameters vec float -> float
dp_gc_frac :: !float
                                         , NewDamageParameters vec float -> float
dp_mu      :: !float
                                         , NewDamageParameters vec float -> float
dp_nu      :: !float
                                         , NewDamageParameters vec float -> vec float
dp_alpha5  :: !(vec float)
                                         , NewDamageParameters vec float -> vec float
dp_beta5   :: !(vec float)
                                         , NewDamageParameters vec float -> float
dp_alpha   :: !float
                                         , NewDamageParameters vec float -> float
dp_beta    :: !float
                                         , NewDamageParameters vec float -> vec float
dp_alpha3  :: !(vec float)
                                         , NewDamageParameters vec float -> vec float
dp_beta3   :: !(vec float) }
  deriving (ReadPrec [NewDamageParameters vec float]
ReadPrec (NewDamageParameters vec float)
Int -> ReadS (NewDamageParameters vec float)
ReadS [NewDamageParameters vec float]
(Int -> ReadS (NewDamageParameters vec float))
-> ReadS [NewDamageParameters vec float]
-> ReadPrec (NewDamageParameters vec float)
-> ReadPrec [NewDamageParameters vec float]
-> Read (NewDamageParameters vec float)
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadPrec [NewDamageParameters vec float]
forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadPrec (NewDamageParameters vec float)
forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
Int -> ReadS (NewDamageParameters vec float)
forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadS [NewDamageParameters vec float]
readListPrec :: ReadPrec [NewDamageParameters vec float]
$creadListPrec :: forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadPrec [NewDamageParameters vec float]
readPrec :: ReadPrec (NewDamageParameters vec float)
$creadPrec :: forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadPrec (NewDamageParameters vec float)
readList :: ReadS [NewDamageParameters vec float]
$creadList :: forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadS [NewDamageParameters vec float]
readsPrec :: Int -> ReadS (NewDamageParameters vec float)
$creadsPrec :: forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
Int -> ReadS (NewDamageParameters vec float)
Read, Int -> NewDamageParameters vec float -> ShowS
[NewDamageParameters vec float] -> ShowS
NewDamageParameters vec float -> String
(Int -> NewDamageParameters vec float -> ShowS)
-> (NewDamageParameters vec float -> String)
-> ([NewDamageParameters vec float] -> ShowS)
-> Show (NewDamageParameters vec float)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
Int -> NewDamageParameters vec float -> ShowS
forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
[NewDamageParameters vec float] -> ShowS
forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
NewDamageParameters vec float -> String
showList :: [NewDamageParameters vec float] -> ShowS
$cshowList :: forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
[NewDamageParameters vec float] -> ShowS
show :: NewDamageParameters vec float -> String
$cshow :: forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
NewDamageParameters vec float -> String
showsPrec :: Int -> NewDamageParameters vec float -> ShowS
$cshowsPrec :: forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
Int -> NewDamageParameters vec float -> ShowS
Show, (forall x.
 NewDamageParameters vec float
 -> Rep (NewDamageParameters vec float) x)
-> (forall x.
    Rep (NewDamageParameters vec float) x
    -> NewDamageParameters vec float)
-> Generic (NewDamageParameters vec float)
forall x.
Rep (NewDamageParameters vec float) x
-> NewDamageParameters vec float
forall x.
NewDamageParameters vec float
-> Rep (NewDamageParameters vec float) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall (vec :: * -> *) float x.
Rep (NewDamageParameters vec float) x
-> NewDamageParameters vec float
forall (vec :: * -> *) float x.
NewDamageParameters vec float
-> Rep (NewDamageParameters vec float) x
$cto :: forall (vec :: * -> *) float x.
Rep (NewDamageParameters vec float) x
-> NewDamageParameters vec float
$cfrom :: forall (vec :: * -> *) float x.
NewDamageParameters vec float
-> Rep (NewDamageParameters vec float) x
Generic)

data GenDamageParameters vec float
    = UnknownDamage
    | OldDamage (DamageParameters float)
    | NewDamage (NewDamageParameters vec float)
  deriving (Int -> GenDamageParameters vec float -> ShowS
[GenDamageParameters vec float] -> ShowS
GenDamageParameters vec float -> String
(Int -> GenDamageParameters vec float -> ShowS)
-> (GenDamageParameters vec float -> String)
-> ([GenDamageParameters vec float] -> ShowS)
-> Show (GenDamageParameters vec float)
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
Int -> GenDamageParameters vec float -> ShowS
forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
[GenDamageParameters vec float] -> ShowS
forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
GenDamageParameters vec float -> String
showList :: [GenDamageParameters vec float] -> ShowS
$cshowList :: forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
[GenDamageParameters vec float] -> ShowS
show :: GenDamageParameters vec float -> String
$cshow :: forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
GenDamageParameters vec float -> String
showsPrec :: Int -> GenDamageParameters vec float -> ShowS
$cshowsPrec :: forall (vec :: * -> *) float.
(Show float, Show (vec float)) =>
Int -> GenDamageParameters vec float -> ShowS
Show, (forall x.
 GenDamageParameters vec float
 -> Rep (GenDamageParameters vec float) x)
-> (forall x.
    Rep (GenDamageParameters vec float) x
    -> GenDamageParameters vec float)
-> Generic (GenDamageParameters vec float)
forall x.
Rep (GenDamageParameters vec float) x
-> GenDamageParameters vec float
forall x.
GenDamageParameters vec float
-> Rep (GenDamageParameters vec float) x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
forall (vec :: * -> *) float x.
Rep (GenDamageParameters vec float) x
-> GenDamageParameters vec float
forall (vec :: * -> *) float x.
GenDamageParameters vec float
-> Rep (GenDamageParameters vec float) x
$cto :: forall (vec :: * -> *) float x.
Rep (GenDamageParameters vec float) x
-> GenDamageParameters vec float
$cfrom :: forall (vec :: * -> *) float x.
GenDamageParameters vec float
-> Rep (GenDamageParameters vec float) x
Generic, ReadPrec [GenDamageParameters vec float]
ReadPrec (GenDamageParameters vec float)
Int -> ReadS (GenDamageParameters vec float)
ReadS [GenDamageParameters vec float]
(Int -> ReadS (GenDamageParameters vec float))
-> ReadS [GenDamageParameters vec float]
-> ReadPrec (GenDamageParameters vec float)
-> ReadPrec [GenDamageParameters vec float]
-> Read (GenDamageParameters vec float)
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadPrec [GenDamageParameters vec float]
forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadPrec (GenDamageParameters vec float)
forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
Int -> ReadS (GenDamageParameters vec float)
forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadS [GenDamageParameters vec float]
readListPrec :: ReadPrec [GenDamageParameters vec float]
$creadListPrec :: forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadPrec [GenDamageParameters vec float]
readPrec :: ReadPrec (GenDamageParameters vec float)
$creadPrec :: forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadPrec (GenDamageParameters vec float)
readList :: ReadS [GenDamageParameters vec float]
$creadList :: forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
ReadS [GenDamageParameters vec float]
readsPrec :: Int -> ReadS (GenDamageParameters vec float)
$creadsPrec :: forall (vec :: * -> *) float.
(Read float, Read (vec float)) =>
Int -> ReadS (GenDamageParameters vec float)
Read)



-- | Generic substitution matrix, has C->T and G->A deamination as
-- parameters.  Setting 'p' or 'q' to 0 as appropriate makes this apply
-- to the single stranded or undamaged case.

{-# INLINE genSubstMat #-}
genSubstMat :: Double -> Double -> Mat44D
genSubstMat :: Double -> Double -> Mat44D
genSubstMat p :: Double
p q :: Double
q = Vector Double -> Mat44D
Mat44D (Vector Double -> Mat44D) -> Vector Double -> Mat44D
forall a b. (a -> b) -> a -> b
$ Int -> [Double] -> Vector Double
forall a. Unbox a => Int -> [a] -> Vector a
U.fromListN 16 [ 1,  0,   Double
q,  0
                                          , 0, 1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
p,  0,  0
                                          , 0,  0,  1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
q, 0
                                          , 0,  Double
p,   0,  1 ]

univDamage :: DamageParameters Double -> DamageModel
univDamage :: DamageParameters Double -> DamageModel
univDamage DP{..} r :: Bool
r l :: Int
l i :: Int
i = Double -> Double -> Mat44D
genSubstMat (Double
p1Double -> Double -> Double
forall a. Num a => a -> a -> a
+Double
p2) (Double
q1Double -> Double -> Double
forall a. Num a => a -> a -> a
+Double
q2)
    where
        (p1 :: Double
p1, q1 :: Double
q1) = if Bool
r then let lam5 :: Double
lam5 = Double
ssd_lambda Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i)
                                 lam3 :: Double
lam3 = Double
ssd_kappa Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ (1Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
                                 lam :: Double
lam  = Double
lam3 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
lam5 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lam3 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
lam5
                                 p :: Double
p    = Double
ssd_sigma Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
lam Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
ssd_delta Double -> Double -> Double
forall a. Num a => a -> a -> a
* (1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
lam)
                             in (0,Double
p)
                        else let lam5 :: Double
lam5 = Double
ssd_lambda Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ (1Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
                                 lam3 :: Double
lam3 = Double
ssd_kappa Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i)
                                 lam :: Double
lam  = Double
lam3 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
lam5 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lam3 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
lam5
                                 p :: Double
p    = Double
ssd_sigma Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
lam Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
ssd_delta Double -> Double -> Double
forall a. Num a => a -> a -> a
* (1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
lam)
                             in (Double
p,0)

        p2 :: Double
p2      = Double
dsd_sigma Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
lam5_ds Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
dsd_delta Double -> Double -> Double
forall a. Num a => a -> a -> a
* (1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
lam5_ds)
        q2 :: Double
q2      = Double
dsd_sigma Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
lam3_ds Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
dsd_delta Double -> Double -> Double
forall a. Num a => a -> a -> a
* (1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
lam3_ds)
        lam5_ds :: Double
lam5_ds = Double
dsd_lambda Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ (1Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i)
        lam3_ds :: Double
lam3_ds = Double
dsd_lambda Double -> Int -> Double
forall a b. (Num a, Integral b) => a -> b -> a
^ (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i)

empDamage :: NewDamageParameters U.Vector Double -> DamageModel
empDamage :: NewDamageParameters Vector Double -> DamageModel
empDamage NDP{..} =
    \r :: Bool
r l :: Int
l i :: Int
i -> if Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l then
                if Bool
r then Mat44D -> Maybe Mat44D -> Mat44D
forall a. a -> Maybe a -> a
fromMaybe Mat44D
middleRev (Vector Mat44D
rev5 Vector Mat44D -> Int -> Maybe Mat44D
forall a. Vector a -> Int -> Maybe a
V.!? Int
i)
                     else Mat44D -> Maybe Mat44D -> Mat44D
forall a. a -> Maybe a -> a
fromMaybe Mat44D
middle    (Vector Mat44D
fwd5 Vector Mat44D -> Int -> Maybe Mat44D
forall a. Vector a -> Int -> Maybe a
V.!? Int
i)
              else
                if Bool
r then Mat44D -> Maybe Mat44D -> Mat44D
forall a. a -> Maybe a -> a
fromMaybe Mat44D
middleRev (Vector Mat44D
rev3 Vector Mat44D -> Int -> Maybe Mat44D
forall a. Vector a -> Int -> Maybe a
V.!? (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-1))
                     else Mat44D -> Maybe Mat44D -> Mat44D
forall a. a -> Maybe a -> a
fromMaybe Mat44D
middle    (Vector Mat44D
fwd3 Vector Mat44D -> Int -> Maybe Mat44D
forall a. Vector a -> Int -> Maybe a
V.!? (Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-1))
  where
    !middle :: Mat44D
middle    = Double -> Double -> Mat44D
genSubstMat' Double
dp_alpha Double
dp_beta
    !middleRev :: Mat44D
middleRev = Double -> Double -> Mat44D
genSubstMat' Double
dp_beta Double
dp_alpha

    !fwd5 :: Vector Mat44D
fwd5 = (Double -> Double -> Mat44D)
-> Vector Double -> Vector Double -> Vector Mat44D
forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith Double -> Double -> Mat44D
genSubstMat' (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
dp_alpha5) (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
dp_beta5)
    !fwd3 :: Vector Mat44D
fwd3 = (Double -> Double -> Mat44D)
-> Vector Double -> Vector Double -> Vector Mat44D
forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith Double -> Double -> Mat44D
genSubstMat' (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
dp_alpha3) (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
dp_beta3)

    !rev5 :: Vector Mat44D
rev5 = (Double -> Double -> Mat44D)
-> Vector Double -> Vector Double -> Vector Mat44D
forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith Double -> Double -> Mat44D
genSubstMat' (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
dp_beta5) (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
dp_alpha5)
    !rev3 :: Vector Mat44D
rev3 = (Double -> Double -> Mat44D)
-> Vector Double -> Vector Double -> Vector Mat44D
forall a b c. (a -> b -> c) -> Vector a -> Vector b -> Vector c
V.zipWith Double -> Double -> Mat44D
genSubstMat' (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
dp_beta3) (Vector Double -> Vector Double
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
G.convert Vector Double
dp_alpha3)

    genSubstMat' :: Double -> Double -> Mat44D
genSubstMat' a :: Double
a b :: Double
b = Double -> Double -> Mat44D
genSubstMat (Double -> Double
forall a. Fractional a => a -> a
recip (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ 1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double -> Double
forall a. Floating a => a -> a
exp (-Double
a)) (Double -> Double
forall a. Fractional a => a -> a
recip (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ 1 Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double -> Double
forall a. Floating a => a -> a
exp (-Double
b))


-- | Collected \"traditional\" statistics:
--
-- * Base composition near 5' end and near 3' end.  Each consists of
--   five vectors of counts of A,C,G,T, and everything else.
--   'basecompo5' begins with 'context' bases to the left of the 5' end,
--   'basecompo3' ends with 'context' bases to the right of the 3' end.
--
-- * Substitutions.  Counted from the reconstructed alignment, once
--   around the 5' end and once around the 3' end.  For a total of 2*4*4
--   different substitutions.  Positions where the query has a gap are
--   skipped.
--
-- * Substitutions at CpG motifs.  Also counted from the reconstructed
--   alignment, and a CpG site is simply the sequence CG in the
--   reference.  Gaps may confuse that definition, so that CpHpG still
--   counts as CpG, because the H is gapped.  That might actually
--   be desirable.
--
-- * Conditional substitutions.  The 5' and 3' ends count as damaged if
--   the very last position has a C-to-T substitution.  With that in
--   mind, 'substs5d5', 'substs5d3', 'substs5dd' are like 'substs5', but
--   counting only reads where the 5' end is damaged, where the 3' end
--   is damaged, and where both ends are damaged, respectively.

data DmgStats = DmgStats {
    DmgStats -> CompositionStats
basecompo5 :: CompositionStats,
    DmgStats -> CompositionStats
basecompo3 :: CompositionStats,
    DmgStats -> SubstitutionStats
substs5    :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs3    :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs5d5  :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs3d5  :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs5d3  :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs3d3  :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs5dd  :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs3dd  :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs5cpg :: SubstitutionStats,
    DmgStats -> SubstitutionStats
substs3cpg :: SubstitutionStats }
  deriving Int -> DmgStats -> ShowS
[DmgStats] -> ShowS
DmgStats -> String
(Int -> DmgStats -> ShowS)
-> (DmgStats -> String) -> ([DmgStats] -> ShowS) -> Show DmgStats
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DmgStats] -> ShowS
$cshowList :: [DmgStats] -> ShowS
show :: DmgStats -> String
$cshow :: DmgStats -> String
showsPrec :: Int -> DmgStats -> ShowS
$cshowsPrec :: Int -> DmgStats -> ShowS
Show

type CompositionStats  = [( Maybe Nucleotide, U.Vector Int )]
type SubstitutionStats = [( Subst, U.Vector Int )]


data FragType = Complete | Leading | Trailing deriving (Int -> FragType -> ShowS
[FragType] -> ShowS
FragType -> String
(Int -> FragType -> ShowS)
-> (FragType -> String) -> ([FragType] -> ShowS) -> Show FragType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [FragType] -> ShowS
$cshowList :: [FragType] -> ShowS
show :: FragType -> String
$cshow :: FragType -> String
showsPrec :: Int -> FragType -> ShowS
$cshowsPrec :: Int -> FragType -> ShowS
Show, FragType -> FragType -> Bool
(FragType -> FragType -> Bool)
-> (FragType -> FragType -> Bool) -> Eq FragType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: FragType -> FragType -> Bool
$c/= :: FragType -> FragType -> Bool
== :: FragType -> FragType -> Bool
$c== :: FragType -> FragType -> Bool
Eq)

-- | Compact storage of a pair of ambiguous 'Nucleotides'.  Used to
-- represent alignments in a way that is accessible even to assembly
-- code.  The first and sencond field are stored in the low and high
-- nybble, respectively.  See 'fst_np', 'snd_np', 'npair'.
newtype NPair = NPair Word8 deriving (NPair -> NPair -> Bool
(NPair -> NPair -> Bool) -> (NPair -> NPair -> Bool) -> Eq NPair
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: NPair -> NPair -> Bool
$c/= :: NPair -> NPair -> Bool
== :: NPair -> NPair -> Bool
$c== :: NPair -> NPair -> Bool
Eq, Eq NPair
Eq NPair =>
(NPair -> NPair -> Ordering)
-> (NPair -> NPair -> Bool)
-> (NPair -> NPair -> Bool)
-> (NPair -> NPair -> Bool)
-> (NPair -> NPair -> Bool)
-> (NPair -> NPair -> NPair)
-> (NPair -> NPair -> NPair)
-> Ord NPair
NPair -> NPair -> Bool
NPair -> NPair -> Ordering
NPair -> NPair -> NPair
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: NPair -> NPair -> NPair
$cmin :: NPair -> NPair -> NPair
max :: NPair -> NPair -> NPair
$cmax :: NPair -> NPair -> NPair
>= :: NPair -> NPair -> Bool
$c>= :: NPair -> NPair -> Bool
> :: NPair -> NPair -> Bool
$c> :: NPair -> NPair -> Bool
<= :: NPair -> NPair -> Bool
$c<= :: NPair -> NPair -> Bool
< :: NPair -> NPair -> Bool
$c< :: NPair -> NPair -> Bool
compare :: NPair -> NPair -> Ordering
$ccompare :: NPair -> NPair -> Ordering
$cp1Ord :: Eq NPair
Ord)

npair :: Nucleotides -> Nucleotides -> NPair
npair :: Nucleotides -> Nucleotides -> NPair
npair (Ns r :: Word8
r) (Ns q :: Word8
q) = Word8 -> NPair
NPair (Word8 -> NPair) -> Word8 -> NPair
forall a b. (a -> b) -> a -> b
$ Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftL Word8
q 4 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8
r Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. 0xF

fst_np, snd_np :: NPair -> Nucleotides
fst_np :: NPair -> Nucleotides
fst_np (NPair w :: Word8
w) = Word8 -> Nucleotides
Ns (Word8
w Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. 0xF)
snd_np :: NPair -> Nucleotides
snd_np (NPair w :: Word8
w) = Word8 -> Nucleotides
Ns (Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftR Word8
w 4)

instance Storable NPair where
    sizeOf :: NPair -> Int
sizeOf    _ = 1
    alignment :: NPair -> Int
alignment _ = 1
    peek :: Ptr NPair -> IO NPair
peek p :: Ptr NPair
p = Word8 -> NPair
NPair (Word8 -> NPair) -> IO Word8 -> IO NPair
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek (Ptr NPair -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr NPair
p :: Ptr Word8)
    poke :: Ptr NPair -> NPair -> IO ()
poke p :: Ptr NPair
p (NPair v :: Word8
v) = Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke (Ptr NPair -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr NPair
p :: Ptr Word8) Word8
v

instance Show NPair where
    showsPrec :: Int -> NPair -> ShowS
showsPrec _ p :: NPair
p = Nucleotides -> ShowS
forall a. Show a => a -> ShowS
shows (NPair -> Nucleotides
fst_np NPair
p) ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (:) '/' ShowS -> ShowS -> ShowS
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Nucleotides -> ShowS
forall a. Show a => a -> ShowS
shows (NPair -> Nucleotides
snd_np NPair
p)

-- | Alignment record.  The reference sequence is filled with Ns if
-- missing.
data Alignment = ALN
    { Alignment -> Vector NPair
a_sequence :: !(VS.Vector NPair)      -- the alignment proper
    , Alignment -> FragType
a_fragment_type :: !FragType }        -- was the adapter trimmed?

addFragType :: Monad m => BamMeta -> Stream (Of BamRaw) m b -> Stream (Of (BamRaw,FragType)) m b
addFragType :: BamMeta
-> Stream (Of BamRaw) m b -> Stream (Of (BamRaw, FragType)) m b
addFragType meta :: BamMeta
meta = (BamRaw -> (BamRaw, FragType))
-> Stream (Of BamRaw) m b -> Stream (Of (BamRaw, FragType)) m b
forall (m :: * -> *) a b r.
Monad m =>
(a -> b) -> Stream (Of a) m r -> Stream (Of b) m r
Q.map ((BamRaw -> (BamRaw, FragType))
 -> Stream (Of BamRaw) m b -> Stream (Of (BamRaw, FragType)) m b)
-> (BamRaw -> (BamRaw, FragType))
-> Stream (Of BamRaw) m b
-> Stream (Of (BamRaw, FragType)) m b
forall a b. (a -> b) -> a -> b
$ \br :: BamRaw
br -> (BamRaw
br, case BamRaw -> BamRec
unpackBam BamRaw
br of
    b :: BamRec
b | BamRec -> Bool
isFirstMate  BamRec
b Bool -> Bool -> Bool
&& BamRec -> Bool
isPaired     BamRec
b -> FragType
Leading
      | BamRec -> Bool
isSecondMate BamRec
b Bool -> Bool -> Bool
&& BamRec -> Bool
isPaired     BamRec
b -> FragType
Trailing
      | Bool -> Bool
not Bool
sane                         -> FragType
Complete     -- leeHom fscked it up
      | BamRec -> Bool
isFirstMate  BamRec
b Bool -> Bool -> Bool
|| BamRec -> Bool
isSecondMate BamRec
b -> FragType
Complete     -- old style flagging
      | BamRec -> Bool
isTrimmed    BamRec
b Bool -> Bool -> Bool
|| BamRec -> Bool
isMerged     BamRec
b -> FragType
Complete     -- new style flagging
      | Bool
otherwise                        -> FragType
Leading)
  where
    sane :: Bool
sane = [()] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ () | ("PG",line :: BamOtherShit
line) <- BamMeta -> [(BamKey, BamOtherShit)]
meta_other_shit BamMeta
meta
                     , ("PN","mergeTrimReadsBAM") <- BamOtherShit
line ]

-- | Stream transformer that computes some statistics from plain BAM
-- (no MD field needed) and a 2bit file.  The 'Alignment' is also
-- reconstructed and passed downstream.  The final value of the source
-- stream ends up in the 'stats_more' field of the result.
--
-- * Get the reference sequence including both contexts once.  If this
--   includes invalid sequence (negative coordinate), pad suitably.
-- * Accumulate counts for the valid parts around 5' and 3' ends as
--   appropriate from flags and config.
-- * Combine the part that was aligned to (so no context) with the read
--   to reconstruct the alignment.
--
-- Arguments are the table of reference names, the 2bit file with the
-- reference, the amount of context outside the alignment desired, and
-- the amount of context inside desired.
--
-- For 'Complete' fragments, we cut the read in the middle, so the 5'
-- and 3' plots stay clean from each other's influence.  'Leading' and
-- 'Trailing' fragments count completely towards the appropriate end.

damagePatternsIter2Bit :: MonadIO m
                       => Refs -> TwoBitFile -> Int -> Int
                       -> Stream (Of (BamRaw,FragType)) m x
                       -> Stream (Of Alignment) m DmgStats
damagePatternsIter2Bit :: Refs
-> TwoBitFile
-> Int
-> Int
-> Stream (Of (BamRaw, FragType)) m x
-> Stream (Of Alignment) m DmgStats
damagePatternsIter2Bit refs :: Refs
refs tbf :: TwoBitFile
tbf ctx :: Int
ctx rng :: Int
rng =
    Int
-> Int
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
-> Stream (Of Alignment) m DmgStats
forall (m :: * -> *) x.
MonadIO m =>
Int
-> Int
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
-> Stream (Of Alignment) m DmgStats
damagePatternsIter Int
ctx Int
rng (Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
 -> Stream (Of Alignment) m DmgStats)
-> (Stream (Of (BamRaw, FragType)) m x
    -> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x)
-> Stream (Of (BamRaw, FragType)) m x
-> Stream (Of Alignment) m DmgStats
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.
    ((BamRaw, FragType)
 -> Maybe (BamRec, FragType, Vector Word8, Vector NPair))
-> Stream (Of (BamRaw, FragType)) m x
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
forall (m :: * -> *) a b r.
Monad m =>
(a -> Maybe b) -> Stream (Of a) m r -> Stream (Of b) m r
Q.mapMaybe (\(br :: BamRaw
br,ft :: FragType
ft) -> do
        let b :: BamRec
b@BamRec{..} = BamRaw -> BamRec
unpackBam BamRaw
br
        Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ BamRec -> Bool
isUnmapped BamRec
b)
        let ref_nm :: Bytes
ref_nm = BamSQ -> Bytes
sq_name (BamSQ -> Bytes) -> BamSQ -> Bytes
forall a b. (a -> b) -> a -> b
$ Refs -> Refseq -> BamSQ
getRef Refs
refs Refseq
b_rname
            ref :: Vector Word8
ref    = TwoBitFile -> Bytes -> Int -> Int -> Vector Word8
getFragment TwoBitFile
tbf Bytes
ref_nm (Int
b_pos Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ctx) (Vector Cigar -> Int
forall (v :: * -> *). Vector v Cigar => v Cigar -> Int
alignedLength Vector Cigar
b_cigar Int -> Int -> Int
forall a. Num a => a -> a -> a
+ 2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
ctx)
            pps :: Vector NPair
pps    = Vector Word8
-> Vector_Nucs_half Nucleotides -> Vector Cigar -> Vector NPair
aln_from_ref (Int -> Vector Word8 -> Vector Word8
forall a. Unbox a => Int -> Vector a -> Vector a
U.drop Int
ctx Vector Word8
ref) Vector_Nucs_half Nucleotides
b_seq Vector Cigar
b_cigar
        (BamRec, FragType, Vector Word8, Vector NPair)
-> Maybe (BamRec, FragType, Vector Word8, Vector NPair)
forall (m :: * -> *) a. Monad m => a -> m a
return (BamRec
b, FragType
ft, Vector Word8
ref, Vector NPair
pps))

-- | Stream transformer that computes some statistics from plain BAM
-- with a valid MD field.  The 'Alignment' is also reconstructed and
-- passed downstream.  The final value of the source stream becomes
-- the 'stats_more' field of the result.
--
-- * Reconstruct the alignment from CIGAR, SEQ, and MD.
-- * Filter the alignment to get the reference sequence, accumulate it.
-- * Accumulate everything over the alignment.
--
-- The argument is the amount of context inside desired.
--
-- For 'Complete' fragments, we cut the read in the middle, so the 5'
-- and 3' plots stay clean from each other's influence.  'Leading' and
-- 'Trailing' fragments count completely towards the appropriate end.

damagePatternsIterMD :: MonadIO m
                     => Int
                     -> Stream (Of (BamRaw,FragType)) m x
                     -> Stream (Of Alignment) m DmgStats
damagePatternsIterMD :: Int
-> Stream (Of (BamRaw, FragType)) m x
-> Stream (Of Alignment) m DmgStats
damagePatternsIterMD rng :: Int
rng =
    Int
-> Int
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
-> Stream (Of Alignment) m DmgStats
forall (m :: * -> *) x.
MonadIO m =>
Int
-> Int
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
-> Stream (Of Alignment) m DmgStats
damagePatternsIter 0 Int
rng (Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
 -> Stream (Of Alignment) m DmgStats)
-> (Stream (Of (BamRaw, FragType)) m x
    -> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x)
-> Stream (Of (BamRaw, FragType)) m x
-> Stream (Of Alignment) m DmgStats
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.
    ((BamRaw, FragType)
 -> Maybe (BamRec, FragType, Vector Word8, Vector NPair))
-> Stream (Of (BamRaw, FragType)) m x
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
forall (m :: * -> *) a b r.
Monad m =>
(a -> Maybe b) -> Stream (Of a) m r -> Stream (Of b) m r
Q.mapMaybe (\(br :: BamRaw
br,ft :: FragType
ft) -> do
        let b :: BamRec
b@BamRec{..} = BamRaw -> BamRec
unpackBam BamRaw
br
        Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ BamRec -> Bool
isUnmapped BamRec
b)
        [MdOp]
md <- BamRec -> Maybe [MdOp]
getMd BamRec
b
        let pps :: Vector NPair
pps = Vector_Nucs_half Nucleotides
-> Vector Cigar -> [MdOp] -> Vector NPair
alnFromMd Vector_Nucs_half Nucleotides
b_seq Vector Cigar
b_cigar [MdOp]
md
            ref :: Vector Word8
ref = Vector Word8 -> Vector Word8
forall (v :: * -> *) a (w :: * -> *).
(Vector v a, Vector w a) =>
v a -> w a
U.convert (Vector Word8 -> Vector Word8) -> Vector Word8 -> Vector Word8
forall a b. (a -> b) -> a -> b
$ (Nucleotides -> Word8) -> Vector Nucleotides -> Vector Word8
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
VS.map Nucleotides -> Word8
forall p. Num p => Nucleotides -> p
fromN (Vector Nucleotides -> Vector Word8)
-> Vector Nucleotides -> Vector Word8
forall a b. (a -> b) -> a -> b
$ (Nucleotides -> Bool) -> Vector Nucleotides -> Vector Nucleotides
forall a. Storable a => (a -> Bool) -> Vector a -> Vector a
VS.filter (Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
(/=) Nucleotides
gap) (Vector Nucleotides -> Vector Nucleotides)
-> Vector Nucleotides -> Vector Nucleotides
forall a b. (a -> b) -> a -> b
$ (NPair -> Nucleotides) -> Vector NPair -> Vector Nucleotides
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
VS.map NPair -> Nucleotides
fst_np Vector NPair
pps
        (BamRec, FragType, Vector Word8, Vector NPair)
-> Maybe (BamRec, FragType, Vector Word8, Vector NPair)
forall (m :: * -> *) a. Monad m => a -> m a
return (BamRec
b, FragType
ft, Vector Word8
ref, Vector NPair
pps))
  where
    fromN :: Nucleotides -> p
fromN ns :: Nucleotides
ns | Nucleotides
ns Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsA = 2
             | Nucleotides
ns Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsC = 1
             | Nucleotides
ns Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsG = 3
             | Nucleotides
ns Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsT = 0
             | Bool
otherwise   = 4

-- | Common logic for statistics. The function 'get_ref_and_aln'
-- reconstructs reference sequence and alignment from a Bam record.  It
-- is expected to construct the alignment with respect to the forwards
-- strand of the reference; we reverse-complement it if necessary.
damagePatternsIter :: MonadIO m
                   => Int -> Int
                   -> Stream (Of (BamRec, FragType, U.Vector Word8, VS.Vector NPair)) m x
                   -> Stream (Of Alignment) m DmgStats
damagePatternsIter :: Int
-> Int
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
-> Stream (Of Alignment) m DmgStats
damagePatternsIter ctx :: Int
ctx rng :: Int
rng stream :: Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
stream = do
    let maxwidth :: Int
maxwidth = Int
ctx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
rng
    MVector RealWorld Int
acc_bc <- IO (MVector RealWorld Int)
-> Stream (Of Alignment) m (MVector RealWorld Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (MVector RealWorld Int)
 -> Stream (Of Alignment) m (MVector RealWorld Int))
-> IO (MVector RealWorld Int)
-> Stream (Of Alignment) m (MVector RealWorld Int)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> IO (MVector (PrimState IO) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
UM.replicate (2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* 5 Int -> Int -> Int
forall a. Num a => a -> a -> a
*    Int
maxwidth) (0::Int)
    MVector RealWorld Int
acc_st <- IO (MVector RealWorld Int)
-> Stream (Of Alignment) m (MVector RealWorld Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (MVector RealWorld Int)
 -> Stream (Of Alignment) m (MVector RealWorld Int))
-> IO (MVector RealWorld Int)
-> Stream (Of Alignment) m (MVector RealWorld Int)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> IO (MVector (PrimState IO) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
UM.replicate (2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
rng) (0::Int)
    MVector RealWorld Int
acc_cg <- IO (MVector RealWorld Int)
-> Stream (Of Alignment) m (MVector RealWorld Int)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (MVector RealWorld Int)
 -> Stream (Of Alignment) m (MVector RealWorld Int))
-> IO (MVector RealWorld Int)
-> Stream (Of Alignment) m (MVector RealWorld Int)
forall a b. (a -> b) -> a -> b
$ Int -> Int -> IO (MVector (PrimState IO) Int)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
UM.replicate (2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* 2 Int -> Int -> Int
forall a. Num a => a -> a -> a
* 4 Int -> Int -> Int
forall a. Num a => a -> a -> a
*     Int
rng) (0::Int)

    Stream (Of Alignment) m x -> Stream (Of Alignment) m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Stream (Of Alignment) m x -> Stream (Of Alignment) m ())
-> Stream (Of Alignment) m x -> Stream (Of Alignment) m ()
forall a b. (a -> b) -> a -> b
$ (((BamRec, FragType, Vector Word8, Vector NPair) -> m Alignment)
 -> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
 -> Stream (Of Alignment) m x)
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
-> ((BamRec, FragType, Vector Word8, Vector NPair) -> m Alignment)
-> Stream (Of Alignment) m x
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((BamRec, FragType, Vector Word8, Vector NPair) -> m Alignment)
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
-> Stream (Of Alignment) m x
forall (m :: * -> *) a b r.
Monad m =>
(a -> m b) -> Stream (Of a) m r -> Stream (Of b) m r
Q.mapM (((BamRec, FragType, Vector Word8, Vector NPair)
 -> (BamRec, FragType, Vector Word8, Vector NPair))
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
-> Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
forall (m :: * -> *) a b r.
Monad m =>
(a -> b) -> Stream (Of a) m r -> Stream (Of b) m r
Q.map (BamRec, FragType, Vector Word8, Vector NPair)
-> (BamRec, FragType, Vector Word8, Vector NPair)
revcom_both Stream (Of (BamRec, FragType, Vector Word8, Vector NPair)) m x
stream) (((BamRec, FragType, Vector Word8, Vector NPair) -> m Alignment)
 -> Stream (Of Alignment) m x)
-> ((BamRec, FragType, Vector Word8, Vector NPair) -> m Alignment)
-> Stream (Of Alignment) m x
forall a b. (a -> b) -> a -> b
$
             \(BamRec{..}, a_fragment_type :: FragType
a_fragment_type, ref :: Vector Word8
ref, a_sequence :: Vector NPair
a_sequence) -> IO Alignment -> m Alignment
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Alignment -> m Alignment) -> IO Alignment -> m Alignment
forall a b. (a -> b) -> a -> b
$ do
                  -- basecompositon near 5' end, near 3' end
                  let (width5 :: Int
width5, width3 :: Int
width3) = case FragType
a_fragment_type of
                                                Leading -> (Int
full_width, 0)
                                                Trailing -> (0, Int
full_width)
                                                Complete -> (Int
half_width, Int
half_width)
                            where full_width :: Int
full_width = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector Word8 -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Word8
ref) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int
ctx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
rng (Vector Cigar -> Int
forall (v :: * -> *). Vector v Cigar => v Cigar -> Int
alignedLength Vector Cigar
b_cigar)
                                  half_width :: Int
half_width = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Vector Word8 -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Word8
ref) (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Int
ctx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
rng (Vector Cigar -> Int
forall (v :: * -> *). Vector v Cigar => v Cigar -> Int
alignedLength Vector Cigar
b_cigar Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 2)
                  (Int -> IO ()) -> [Int] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (\i :: Int
i -> Int -> MVector (PrimState IO) Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a, Enum a) =>
Int -> MVector (PrimState m) a -> m ()
bump (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector Word8
ref Vector Word8 -> Int -> Word8
forall a. Unbox a => Vector a -> Int -> a
U.!  Int
i                   ) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
maxwidth Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) MVector RealWorld Int
MVector (PrimState IO) Int
acc_bc) [0 .. Int
width5Int -> Int -> Int
forall a. Num a => a -> a -> a
-1]
                  (Int -> IO ()) -> [Int] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (\i :: Int
i -> Int -> MVector (PrimState IO) Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a, Enum a) =>
Int -> MVector (PrimState m) a -> m ()
bump (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Vector Word8
ref Vector Word8 -> Int -> Word8
forall a. Unbox a => Vector a -> Int -> a
U.! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Vector Word8 -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector Word8
ref) Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+6) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
maxwidth Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) MVector RealWorld Int
MVector (PrimState IO) Int
acc_bc) [-Int
width3 .. -1]

                  -- For substitutions, decide what damage class we're in:
                  -- 0 - no damage, 1 - damaged 5' end, 2 - damaged 3' end, 3 - both
                  let dmgbase :: Int
dmgbase = 2Int -> Int -> Int
forall a. Num a => a -> a -> a
*4Int -> Int -> Int
forall a. Num a => a -> a -> a
*4Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
rng Int -> Int -> Int
forall a. Num a => a -> a -> a
*
                                ((if Vector NPair -> Bool
forall a. Storable a => Vector a -> Bool
VS.null Vector NPair
a_sequence Bool -> Bool -> Bool
|| Vector NPair -> NPair
forall a. Storable a => Vector a -> a
VS.head Vector NPair
a_sequence NPair -> NPair -> Bool
forall a. Eq a => a -> a -> Bool
/= Nucleotides -> Nucleotides -> NPair
npair Nucleotides
nucsC Nucleotides
nucsT then 1 else 0)
                                Int -> Int -> Int
forall a. Num a => a -> a -> a
+(if Vector NPair -> Bool
forall a. Storable a => Vector a -> Bool
VS.null Vector NPair
a_sequence Bool -> Bool -> Bool
|| Vector NPair -> NPair
forall a. Storable a => Vector a -> a
VS.last Vector NPair
a_sequence NPair -> NPair -> Bool
forall a. Eq a => a -> a -> Bool
/= Nucleotides -> Nucleotides -> NPair
npair Nucleotides
nucsC Nucleotides
nucsT then 2 else 0))

                  -- substitutions near 5' end
                  let len_at_5 :: Int
len_at_5 = case FragType
a_fragment_type of Leading  -> Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
rng (Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector_Nucs_half Nucleotides
b_seq)
                                                         Complete -> Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
rng (Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector_Nucs_half Nucleotides
b_seq Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 2)
                                                         Trailing -> 0
                  ((Int -> NPair -> IO ()) -> Vector NPair -> IO ())
-> Vector NPair -> (Int -> NPair -> IO ()) -> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int -> NPair -> IO ()) -> Vector NPair -> IO ()
forall (m :: * -> *) (v :: * -> *) a b.
(Monad m, Vector v a) =>
(Int -> a -> m b) -> v a -> m ()
G.imapM_ (Int -> Vector NPair -> Vector NPair
forall a. Storable a => Int -> Vector a -> Vector a
VS.take Int
len_at_5 Vector NPair
a_sequence) ((Int -> NPair -> IO ()) -> IO ())
-> (Int -> NPair -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$
                        \i :: Int
i uv :: NPair
uv -> NPair -> (Int -> IO ()) -> IO ()
forall (f :: * -> *).
Applicative f =>
NPair -> (Int -> f ()) -> f ()
withPair NPair
uv ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \j :: Int
j -> Int -> MVector (PrimState IO) Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a, Enum a) =>
Int -> MVector (PrimState m) a -> m ()
bump (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
rng Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
dmgbase) MVector RealWorld Int
MVector (PrimState IO) Int
acc_st

                  -- substitutions at CpG sites near 5' end
                  (Int -> NPair -> NPair -> IO ())
-> Vector NPair -> Vector NPair -> IO ()
forall (m :: * -> *) (v :: * -> *) a b c.
(Monad m, Vector v a, Vector v b) =>
(Int -> a -> b -> m c) -> v a -> v b -> m ()
G.izipWithM_
                      (\i :: Int
i uv :: NPair
uv wz :: NPair
wz ->
                          Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (NPair -> Nucleotides
fst_np NPair
uv Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsC Bool -> Bool -> Bool
&& NPair -> Nucleotides
fst_np NPair
wz Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsG) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
                              Nucleotides -> (Int -> IO ()) -> IO ()
forall t (m :: * -> *).
(Num t, Monad m) =>
Nucleotides -> (t -> m ()) -> m ()
withNs (NPair -> Nucleotides
snd_np NPair
uv) ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \y :: Int
y -> Int -> MVector (PrimState IO) Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a, Enum a) =>
Int -> MVector (PrimState m) a -> m ()
bump (  Int
y   Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
rng Int -> Int -> Int
forall a. Num a => a -> a -> a
+  Int
i ) MVector RealWorld Int
MVector (PrimState IO) Int
acc_cg
                              Nucleotides -> (Int -> IO ()) -> IO ()
forall t (m :: * -> *).
(Num t, Monad m) =>
Nucleotides -> (t -> m ()) -> m ()
withNs (NPair -> Nucleotides
snd_np NPair
wz) ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \y :: Int
y -> Int -> MVector (PrimState IO) Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a, Enum a) =>
Int -> MVector (PrimState m) a -> m ()
bump ((Int
yInt -> Int -> Int
forall a. Num a => a -> a -> a
+4) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
rng Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+1) MVector RealWorld Int
MVector (PrimState IO) Int
acc_cg)
                      (Int -> Vector NPair -> Vector NPair
forall a. Storable a => Int -> Vector a -> Vector a
VS.take Int
len_at_5 Vector NPair
a_sequence) (Int -> Vector NPair -> Vector NPair
forall a. Storable a => Int -> Vector a -> Vector a
VS.drop 1 Vector NPair
a_sequence)

                  -- substitutions near 3' end
                  let len_at_3 :: Int
len_at_3 = case FragType
a_fragment_type of Leading  -> 0
                                                         Complete -> Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
rng (Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector_Nucs_half Nucleotides
b_seq Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` 2)
                                                         Trailing -> Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
rng (Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector_Nucs_half Nucleotides
b_seq)
                  ((Int -> NPair -> IO ()) -> Vector NPair -> IO ())
-> Vector NPair -> (Int -> NPair -> IO ()) -> IO ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int -> NPair -> IO ()) -> Vector NPair -> IO ()
forall (m :: * -> *) (v :: * -> *) a b.
(Monad m, Vector v a) =>
(Int -> a -> m b) -> v a -> m ()
G.imapM_ (Int -> Vector NPair -> Vector NPair
forall a. Storable a => Int -> Vector a -> Vector a
VS.take Int
len_at_3 (Vector NPair -> Vector NPair
forall a. Storable a => Vector a -> Vector a
VS.reverse Vector NPair
a_sequence)) ((Int -> NPair -> IO ()) -> IO ())
-> (Int -> NPair -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$
                        \i :: Int
i uv :: NPair
uv -> NPair -> (Int -> IO ()) -> IO ()
forall (f :: * -> *).
Applicative f =>
NPair -> (Int -> f ()) -> f ()
withPair NPair
uv ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \j :: Int
j -> Int -> MVector (PrimState IO) Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a, Enum a) =>
Int -> MVector (PrimState m) a -> m ()
bump ((17Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
rng Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
-1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
dmgbase) MVector RealWorld Int
MVector (PrimState IO) Int
acc_st

                  -- substitutions at CpG sites near 3' end
                  (Int -> NPair -> NPair -> IO ())
-> Vector NPair -> Vector NPair -> IO ()
forall (m :: * -> *) (v :: * -> *) a b c.
(Monad m, Vector v a, Vector v b) =>
(Int -> a -> b -> m c) -> v a -> v b -> m ()
G.izipWithM_
                      (\i :: Int
i wz :: NPair
wz uv :: NPair
uv ->
                          Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (NPair -> Nucleotides
fst_np NPair
uv Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsC Bool -> Bool -> Bool
&& NPair -> Nucleotides
fst_np NPair
wz Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsG) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$ do
                              Nucleotides -> (Int -> IO ()) -> IO ()
forall t (m :: * -> *).
(Num t, Monad m) =>
Nucleotides -> (t -> m ()) -> m ()
withNs (NPair -> Nucleotides
snd_np NPair
uv) ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \y :: Int
y -> Int -> MVector (PrimState IO) Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a, Enum a) =>
Int -> MVector (PrimState m) a -> m ()
bump ((Int
yInt -> Int -> Int
forall a. Num a => a -> a -> a
+ 9) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
rng Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-2) MVector RealWorld Int
MVector (PrimState IO) Int
acc_cg
                              Nucleotides -> (Int -> IO ()) -> IO ()
forall t (m :: * -> *).
(Num t, Monad m) =>
Nucleotides -> (t -> m ()) -> m ()
withNs (NPair -> Nucleotides
snd_np NPair
wz) ((Int -> IO ()) -> IO ()) -> (Int -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \y :: Int
y -> Int -> MVector (PrimState IO) Int -> IO ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a, Enum a) =>
Int -> MVector (PrimState m) a -> m ()
bump ((Int
yInt -> Int -> Int
forall a. Num a => a -> a -> a
+13) Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
rng Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) MVector RealWorld Int
MVector (PrimState IO) Int
acc_cg)
                      (Int -> Vector NPair -> Vector NPair
forall a. Storable a => Int -> Vector a -> Vector a
VS.take Int
len_at_3 (Vector NPair -> Vector NPair
forall a. Storable a => Vector a -> Vector a
VS.reverse Vector NPair
a_sequence))
                      (Int -> Vector NPair -> Vector NPair
forall a. Storable a => Int -> Vector a -> Vector a
VS.drop 1 (Vector NPair -> Vector NPair
forall a. Storable a => Vector a -> Vector a
VS.reverse Vector NPair
a_sequence))

                  Alignment -> IO Alignment
forall (m :: * -> *) a. Monad m => a -> m a
return $WALN :: Vector NPair -> FragType -> Alignment
ALN{..}


    let nsubsts :: Int
nsubsts = 4Int -> Int -> Int
forall a. Num a => a -> a -> a
*4Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
rng
        mk_substs :: Int -> IO SubstitutionStats
mk_substs off :: Int
off = [IO (Subst, Vector Int)] -> IO SubstitutionStats
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence [ (,) (Nucleotide
n1 Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
n2) (Vector Int -> (Subst, Vector Int))
-> IO (Vector Int) -> IO (Subst, Vector Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) Int -> IO (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze (Int
-> Int -> MVector (PrimState IO) Int -> MVector (PrimState IO) Int
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
UM.slice ((4Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j)Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
rng Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
*Int
nsubsts) Int
rng MVector RealWorld Int
MVector (PrimState IO) Int
acc_st)
                                 | (i :: Int
i,n1 :: Nucleotide
n1) <- [Int] -> [Nucleotide] -> [(Int, Nucleotide)]
forall a b. [a] -> [b] -> [(a, b)]
zip [0..] [Nucleotide
nucA..Nucleotide
nucT]
                                 , (j :: Int
j,n2 :: Nucleotide
n2) <- [Int] -> [Nucleotide] -> [(Int, Nucleotide)]
forall a b. [a] -> [b] -> [(a, b)]
zip [0..] [Nucleotide
nucA..Nucleotide
nucT] ]

    DmgStats
accs <- IO DmgStats -> Stream (Of Alignment) m DmgStats
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO DmgStats -> Stream (Of Alignment) m DmgStats)
-> IO DmgStats -> Stream (Of Alignment) m DmgStats
forall a b. (a -> b) -> a -> b
$ CompositionStats
-> CompositionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> DmgStats
DmgStats (CompositionStats
 -> CompositionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> SubstitutionStats
 -> DmgStats)
-> IO CompositionStats
-> IO
     (CompositionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> DmgStats)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [IO (Maybe Nucleotide, Vector Int)] -> IO CompositionStats
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence [ (,) Maybe Nucleotide
nuc (Vector Int -> (Maybe Nucleotide, Vector Int))
-> IO (Vector Int) -> IO (Maybe Nucleotide, Vector Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) Int -> IO (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze (Int -> Int -> MVector RealWorld Int -> MVector RealWorld Int
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
UM.slice (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
*Int
maxwidth) Int
maxwidth MVector RealWorld Int
acc_bc)
                                           | (i :: Int
i,nuc :: Maybe Nucleotide
nuc) <- [Int] -> [Maybe Nucleotide] -> [(Int, Maybe Nucleotide)]
forall a b. [a] -> [b] -> [(a, b)]
zip [2,1,3,0,4] [Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucA,Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucC,Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucG,Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucT,Maybe Nucleotide
forall a. Maybe a
Nothing] ]
                              IO
  (CompositionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> DmgStats)
-> IO CompositionStats
-> IO
     (SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [IO (Maybe Nucleotide, Vector Int)] -> IO CompositionStats
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence [ (,) Maybe Nucleotide
nuc (Vector Int -> (Maybe Nucleotide, Vector Int))
-> IO (Vector Int) -> IO (Maybe Nucleotide, Vector Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) Int -> IO (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze (Int -> Int -> MVector RealWorld Int -> MVector RealWorld Int
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
UM.slice (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
*Int
maxwidth) Int
maxwidth MVector RealWorld Int
acc_bc)
                                           | (i :: Int
i,nuc :: Maybe Nucleotide
nuc) <- [Int] -> [Maybe Nucleotide] -> [(Int, Maybe Nucleotide)]
forall a b. [a] -> [b] -> [(a, b)]
zip [7,6,8,5,9] [Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucA,Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucC,Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucG,Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucT,Maybe Nucleotide
forall a. Maybe a
Nothing] ]

                              IO
  (SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> DmgStats)
-> IO SubstitutionStats
-> IO
     (SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO SubstitutionStats
mk_substs 0
                              IO
  (SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> DmgStats)
-> IO SubstitutionStats
-> IO
     (SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO SubstitutionStats
mk_substs 1
                              IO
  (SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> DmgStats)
-> IO SubstitutionStats
-> IO
     (SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO SubstitutionStats
mk_substs 2
                              IO
  (SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> DmgStats)
-> IO SubstitutionStats
-> IO
     (SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO SubstitutionStats
mk_substs 3
                              IO
  (SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> DmgStats)
-> IO SubstitutionStats
-> IO
     (SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO SubstitutionStats
mk_substs 4
                              IO
  (SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> DmgStats)
-> IO SubstitutionStats
-> IO
     (SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> SubstitutionStats
      -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO SubstitutionStats
mk_substs 5
                              IO
  (SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> SubstitutionStats
   -> DmgStats)
-> IO SubstitutionStats
-> IO
     (SubstitutionStats
      -> SubstitutionStats -> SubstitutionStats -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO SubstitutionStats
mk_substs 6
                              IO
  (SubstitutionStats
   -> SubstitutionStats -> SubstitutionStats -> DmgStats)
-> IO SubstitutionStats
-> IO (SubstitutionStats -> SubstitutionStats -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Int -> IO SubstitutionStats
mk_substs 7

                              IO (SubstitutionStats -> SubstitutionStats -> DmgStats)
-> IO SubstitutionStats -> IO (SubstitutionStats -> DmgStats)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [IO (Subst, Vector Int)] -> IO SubstitutionStats
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence [ (,) (Nucleotide
n1 Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
n2) (Vector Int -> (Subst, Vector Int))
-> IO (Vector Int) -> IO (Subst, Vector Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) Int -> IO (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze (Int -> Int -> MVector RealWorld Int -> MVector RealWorld Int
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
UM.slice ((Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j)Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
rng) Int
rng MVector RealWorld Int
acc_cg)
                                           | (i :: Int
i,n1 :: Nucleotide
n1) <- [(0,Nucleotide
nucC), (4,Nucleotide
nucG)]
                                           , (j :: Int
j,n2 :: Nucleotide
n2) <- [Int] -> [Nucleotide] -> [(Int, Nucleotide)]
forall a b. [a] -> [b] -> [(a, b)]
zip [0..] [Nucleotide
nucA..Nucleotide
nucT] ]

                              IO (SubstitutionStats -> DmgStats)
-> IO SubstitutionStats -> IO DmgStats
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [IO (Subst, Vector Int)] -> IO SubstitutionStats
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence [ (,) (Nucleotide
n1 Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
n2) (Vector Int -> (Subst, Vector Int))
-> IO (Vector Int) -> IO (Subst, Vector Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MVector (PrimState IO) Int -> IO (Vector Int)
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
MVector (PrimState m) a -> m (Vector a)
U.unsafeFreeze (Int -> Int -> MVector RealWorld Int -> MVector RealWorld Int
forall a s. Unbox a => Int -> Int -> MVector s a -> MVector s a
UM.slice ((Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
j)Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
rng) Int
rng MVector RealWorld Int
acc_cg)
                                           | (i :: Int
i,n2 :: Nucleotide
n2) <- [(8,Nucleotide
nucC), (12,Nucleotide
nucG)]
                                           , (j :: Int
j,n1 :: Nucleotide
n1) <- [Int] -> [Nucleotide] -> [(Int, Nucleotide)]
forall a b. [a] -> [b] -> [(a, b)]
zip [0..] [Nucleotide
nucA..Nucleotide
nucT] ]

    DmgStats -> Stream (Of Alignment) m DmgStats
forall (m :: * -> *) a. Monad m => a -> m a
return (DmgStats -> Stream (Of Alignment) m DmgStats)
-> DmgStats -> Stream (Of Alignment) m DmgStats
forall a b. (a -> b) -> a -> b
$ DmgStats
accs { substs5 :: SubstitutionStats
substs5   = [SubstitutionStats] -> SubstitutionStats
forall a. Monoid a => [a] -> a
mconcat [ DmgStats -> SubstitutionStats
substs5 DmgStats
accs, DmgStats -> SubstitutionStats
substs5d5 DmgStats
accs, DmgStats -> SubstitutionStats
substs5d3 DmgStats
accs, DmgStats -> SubstitutionStats
substs5dd DmgStats
accs ]
                  , substs3 :: SubstitutionStats
substs3   = [SubstitutionStats] -> SubstitutionStats
forall a. Monoid a => [a] -> a
mconcat [ DmgStats -> SubstitutionStats
substs3 DmgStats
accs, DmgStats -> SubstitutionStats
substs3d5 DmgStats
accs, DmgStats -> SubstitutionStats
substs3d3 DmgStats
accs, DmgStats -> SubstitutionStats
substs3dd DmgStats
accs ]
                  , substs5d5 :: SubstitutionStats
substs5d5 = [SubstitutionStats] -> SubstitutionStats
forall a. Monoid a => [a] -> a
mconcat [ DmgStats -> SubstitutionStats
substs5d5 DmgStats
accs, DmgStats -> SubstitutionStats
substs5dd DmgStats
accs]
                  , substs3d5 :: SubstitutionStats
substs3d5 = [SubstitutionStats] -> SubstitutionStats
forall a. Monoid a => [a] -> a
mconcat [ DmgStats -> SubstitutionStats
substs3d5 DmgStats
accs, DmgStats -> SubstitutionStats
substs3dd DmgStats
accs]
                  , substs5d3 :: SubstitutionStats
substs5d3 = [SubstitutionStats] -> SubstitutionStats
forall a. Monoid a => [a] -> a
mconcat [ DmgStats -> SubstitutionStats
substs5d3 DmgStats
accs, DmgStats -> SubstitutionStats
substs5dd DmgStats
accs]
                  , substs3d3 :: SubstitutionStats
substs3d3 = [SubstitutionStats] -> SubstitutionStats
forall a. Monoid a => [a] -> a
mconcat [ DmgStats -> SubstitutionStats
substs3d3 DmgStats
accs, DmgStats -> SubstitutionStats
substs3dd DmgStats
accs] }
  where
    {-# INLINE withPair #-}
    withPair :: NPair -> (Int -> f ()) -> f ()
withPair (NPair i :: Word8
i) k :: Int -> f ()
k =
        case Vector Int
pairTab Vector Int -> Int -> Int
forall a. Unbox a => Vector a -> Int -> a
`U.unsafeIndex` Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
i of
            j :: Int
j -> Bool -> f () -> f ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0) (Int -> f ()
k Int
j)

    !pairTab :: Vector Int
pairTab = Int -> Int -> Vector Int
forall a. Unbox a => Int -> a -> Vector a
U.replicate 256 (-1) Vector Int -> [(Int, Int)] -> Vector Int
forall a. Unbox a => Vector a -> [(Int, a)] -> Vector a
U.//
            [ (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
i, Int
xInt -> Int -> Int
forall a. Num a => a -> a -> a
*4Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
y) | (u :: Nucleotides
u,x :: Int
x) <- [Nucleotides] -> [Int] -> [(Nucleotides, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Nucleotides
nucsA, Nucleotides
nucsC, Nucleotides
nucsG, Nucleotides
nucsT] [0,1,2,3]
                                      , (v :: Nucleotides
v,y :: Int
y) <- [Nucleotides] -> [Int] -> [(Nucleotides, Int)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Nucleotides
nucsA, Nucleotides
nucsC, Nucleotides
nucsG, Nucleotides
nucsT] [0,1,2,3]
                                      , let NPair i :: Word8
i = Nucleotides -> Nucleotides -> NPair
npair Nucleotides
u Nucleotides
v ]
    {-# INLINE bump #-}
    bump :: Int -> MVector (PrimState m) a -> m ()
bump i :: Int
i v :: MVector (PrimState m) a
v = MVector (PrimState m) a -> Int -> m a
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> m a
UM.unsafeRead MVector (PrimState m) a
v Int
i m a -> (a -> m ()) -> m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= MVector (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
UM.unsafeWrite MVector (PrimState m) a
v Int
i (a -> m ()) -> (a -> a) -> a -> m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. a -> a
forall a. Enum a => a -> a
succ

    {-# INLINE withNs #-}
    withNs :: Nucleotides -> (t -> m ()) -> m ()
withNs ns :: Nucleotides
ns k :: t -> m ()
k | Nucleotides
ns Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsA = t -> m ()
k 0
                | Nucleotides
ns Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsC = t -> m ()
k 1
                | Nucleotides
ns Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsG = t -> m ()
k 2
                | Nucleotides
ns Nucleotides -> Nucleotides -> Bool
forall a. Eq a => a -> a -> Bool
== Nucleotides
nucsT = t -> m ()
k 3
                | Bool
otherwise   = () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()


instance Monoid DmgStats where
    mappend :: DmgStats -> DmgStats -> DmgStats
mappend = DmgStats -> DmgStats -> DmgStats
forall a. Semigroup a => a -> a -> a
(<>)
    mempty :: DmgStats
mempty = DmgStats :: CompositionStats
-> CompositionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> DmgStats
DmgStats { basecompo5 :: CompositionStats
basecompo5 = CompositionStats
empty_compo
                      , basecompo3 :: CompositionStats
basecompo3 = CompositionStats
empty_compo
                      , substs5 :: SubstitutionStats
substs5    = SubstitutionStats
empty_subst
                      , substs3 :: SubstitutionStats
substs3    = SubstitutionStats
empty_subst
                      , substs5d5 :: SubstitutionStats
substs5d5  = SubstitutionStats
empty_subst
                      , substs3d5 :: SubstitutionStats
substs3d5  = SubstitutionStats
empty_subst
                      , substs5d3 :: SubstitutionStats
substs5d3  = SubstitutionStats
empty_subst
                      , substs3d3 :: SubstitutionStats
substs3d3  = SubstitutionStats
empty_subst
                      , substs5dd :: SubstitutionStats
substs5dd  = SubstitutionStats
empty_subst
                      , substs3dd :: SubstitutionStats
substs3dd  = SubstitutionStats
empty_subst
                      , substs5cpg :: SubstitutionStats
substs5cpg = SubstitutionStats
empty_subst
                      , substs3cpg :: SubstitutionStats
substs3cpg = SubstitutionStats
empty_subst }
      where
        empty_compo :: CompositionStats
empty_compo = [ (Maybe Nucleotide
nuc, Vector Int
forall a. Unbox a => Vector a
U.empty) | Maybe Nucleotide
nuc <- [Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucA, Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucC, Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucG, Nucleotide -> Maybe Nucleotide
forall a. a -> Maybe a
Just Nucleotide
nucT, Maybe Nucleotide
forall a. Maybe a
Nothing] ]
        empty_subst :: SubstitutionStats
empty_subst = [ (Nucleotide
n1 Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
n2, Vector Int
forall a. Unbox a => Vector a
U.empty) | Nucleotide
n1 <- [Nucleotide
nucA..Nucleotide
nucT], Nucleotide
n2 <- [Nucleotide
nucA..Nucleotide
nucT] ]

instance Semigroup DmgStats where
    <> :: DmgStats -> DmgStats -> DmgStats
(<>) = DmgStats -> DmgStats -> DmgStats
merge_dmg_stats

merge_dmg_stats :: DmgStats -> DmgStats -> DmgStats
merge_dmg_stats :: DmgStats -> DmgStats -> DmgStats
merge_dmg_stats a :: DmgStats
a b :: DmgStats
b =
    DmgStats :: CompositionStats
-> CompositionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> SubstitutionStats
-> DmgStats
DmgStats { basecompo5 :: CompositionStats
basecompo5 = ((Maybe Nucleotide, Vector Int)
 -> (Maybe Nucleotide, Vector Int)
 -> (Maybe Nucleotide, Vector Int))
-> CompositionStats -> CompositionStats -> CompositionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Maybe Nucleotide, Vector Int)
-> (Maybe Nucleotide, Vector Int) -> (Maybe Nucleotide, Vector Int)
forall a c.
(Eq a, Unbox c, Num c) =>
(a, Vector c) -> (a, Vector c) -> (a, Vector c)
s1 (DmgStats -> CompositionStats
basecompo5 DmgStats
a) (DmgStats -> CompositionStats
basecompo5 DmgStats
b)
             , basecompo3 :: CompositionStats
basecompo3 = ((Maybe Nucleotide, Vector Int)
 -> (Maybe Nucleotide, Vector Int)
 -> (Maybe Nucleotide, Vector Int))
-> CompositionStats -> CompositionStats -> CompositionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Maybe Nucleotide, Vector Int)
-> (Maybe Nucleotide, Vector Int) -> (Maybe Nucleotide, Vector Int)
forall a c.
(Eq a, Unbox c, Num c) =>
(a, Vector c) -> (a, Vector c) -> (a, Vector c)
s1 (DmgStats -> CompositionStats
basecompo3 DmgStats
a) (DmgStats -> CompositionStats
basecompo3 DmgStats
b)
             , substs5 :: SubstitutionStats
substs5    = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs5    DmgStats
a) (DmgStats -> SubstitutionStats
substs5    DmgStats
b)
             , substs3 :: SubstitutionStats
substs3    = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs3    DmgStats
a) (DmgStats -> SubstitutionStats
substs3    DmgStats
b)
             , substs5d5 :: SubstitutionStats
substs5d5  = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs5d5  DmgStats
a) (DmgStats -> SubstitutionStats
substs5d5  DmgStats
b)
             , substs3d5 :: SubstitutionStats
substs3d5  = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs3d5  DmgStats
a) (DmgStats -> SubstitutionStats
substs3d5  DmgStats
b)
             , substs5d3 :: SubstitutionStats
substs5d3  = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs5d3  DmgStats
a) (DmgStats -> SubstitutionStats
substs5d3  DmgStats
b)
             , substs3d3 :: SubstitutionStats
substs3d3  = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs3d3  DmgStats
a) (DmgStats -> SubstitutionStats
substs3d3  DmgStats
b)
             , substs5dd :: SubstitutionStats
substs5dd  = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs5dd  DmgStats
a) (DmgStats -> SubstitutionStats
substs5dd  DmgStats
b)
             , substs3dd :: SubstitutionStats
substs3dd  = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs3dd  DmgStats
a) (DmgStats -> SubstitutionStats
substs3dd  DmgStats
b)
             , substs5cpg :: SubstitutionStats
substs5cpg = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs5cpg DmgStats
a) (DmgStats -> SubstitutionStats
substs5cpg DmgStats
b)
             , substs3cpg :: SubstitutionStats
substs3cpg = ((Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int))
-> SubstitutionStats -> SubstitutionStats -> SubstitutionStats
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Subst, Vector Int) -> (Subst, Vector Int) -> (Subst, Vector Int)
forall c.
(Unbox c, Num c) =>
(Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (DmgStats -> SubstitutionStats
substs3cpg DmgStats
a) (DmgStats -> SubstitutionStats
substs3cpg DmgStats
b) }
      where
        s1 :: (a, Vector c) -> (a, Vector c) -> (a, Vector c)
s1 (x :: a
x, u :: Vector c
u) (z :: a
z, v :: Vector c
v) | a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
z    = String -> (a, Vector c)
forall a. HasCallStack => String -> a
error "Mismatch in zip.  This is a bug."
                         | Vector c -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector c
u  = (a
x, Vector c
v)
                         | Vector c -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector c
v  = (a
x, Vector c
u)
                         | Bool
otherwise = (a
x, (c -> c -> c) -> Vector c -> Vector c -> Vector c
forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
U.zipWith c -> c -> c
forall a. Num a => a -> a -> a
(+) Vector c
u Vector c
v)

        s2 :: (Subst, Vector c) -> (Subst, Vector c) -> (Subst, Vector c)
s2 (x :: Nucleotide
x :-> y :: Nucleotide
y, u :: Vector c
u) (z :: Nucleotide
z :-> w :: Nucleotide
w, v :: Vector c
v) | Nucleotide
x Nucleotide -> Nucleotide -> Bool
forall a. Eq a => a -> a -> Bool
/= Nucleotide
z Bool -> Bool -> Bool
|| Nucleotide
y Nucleotide -> Nucleotide -> Bool
forall a. Eq a => a -> a -> Bool
/= Nucleotide
w = String -> (Subst, Vector c)
forall a. HasCallStack => String -> a
error "Mismatch in zip.  This is a bug."
                                     | Vector c -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector c
u         = (Nucleotide
x Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
y, Vector c
v)
                                     | Vector c -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector c
v         = (Nucleotide
x Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
y, Vector c
u)
                                     | Bool
otherwise        = (Nucleotide
x Nucleotide -> Nucleotide -> Subst
:-> Nucleotide
y, (c -> c -> c) -> Vector c -> Vector c -> Vector c
forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
U.zipWith c -> c -> c
forall a. Num a => a -> a -> a
(+) Vector c
u Vector c
v)


revcom_both :: ( BamRec, FragType, U.Vector Word8, VS.Vector NPair )
            -> ( BamRec, FragType, U.Vector Word8, VS.Vector NPair )
revcom_both :: (BamRec, FragType, Vector Word8, Vector NPair)
-> (BamRec, FragType, Vector Word8, Vector NPair)
revcom_both (b :: BamRec
b, ft :: FragType
ft, ref :: Vector Word8
ref, pps :: Vector NPair
pps)
    | BamRec -> Bool
isReversed BamRec
b = ( BamRec
b, FragType
ft, Vector Word8 -> Vector Word8
revcom_ref Vector Word8
ref, Vector NPair -> Vector NPair
revcom_pairs Vector NPair
pps )
    | Bool
otherwise    = ( BamRec
b, FragType
ft,            Vector Word8
ref,              Vector NPair
pps )
  where
    revcom_ref :: Vector Word8 -> Vector Word8
revcom_ref   =  Vector Word8 -> Vector Word8
forall a. Unbox a => Vector a -> Vector a
U.reverse (Vector Word8 -> Vector Word8)
-> (Vector Word8 -> Vector Word8) -> Vector Word8 -> Vector Word8
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
.  (Word8 -> Word8) -> Vector Word8 -> Vector Word8
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (\c :: Word8
c -> if Word8
c Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> 3 then Word8
c else Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
xor Word8
c 2)
    revcom_pairs :: Vector NPair -> Vector NPair
revcom_pairs = Vector NPair -> Vector NPair
forall a. Storable a => Vector a -> Vector a
VS.reverse (Vector NPair -> Vector NPair)
-> (Vector NPair -> Vector NPair) -> Vector NPair -> Vector NPair
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (NPair -> NPair) -> Vector NPair -> Vector NPair
forall a b.
(Storable a, Storable b) =>
(a -> b) -> Vector a -> Vector b
VS.map (\p :: NPair
p -> Nucleotides -> Nucleotides -> NPair
npair (Nucleotides -> Nucleotides
compls (Nucleotides -> Nucleotides) -> Nucleotides -> Nucleotides
forall a b. (a -> b) -> a -> b
$ NPair -> Nucleotides
fst_np NPair
p) (Nucleotides -> Nucleotides
compls (Nucleotides -> Nucleotides) -> Nucleotides -> Nucleotides
forall a b. (a -> b) -> a -> b
$ NPair -> Nucleotides
snd_np NPair
p))


-- | Reconstructs the alignment from reference, query, and cigar.  Only
-- positions where the query is not gapped are produced.
aln_from_ref :: U.Vector Word8 -> Vector_Nucs_half Nucleotides -> VS.Vector Cigar -> VS.Vector NPair
aln_from_ref :: Vector Word8
-> Vector_Nucs_half Nucleotides -> Vector Cigar -> Vector NPair
aln_from_ref ref0 :: Vector Word8
ref0 qry0 :: Vector_Nucs_half Nucleotides
qry0 cig0 :: Vector Cigar
cig0 = [NPair] -> Vector NPair
forall a. Storable a => [a] -> Vector a
VS.fromList ([NPair] -> Vector NPair) -> [NPair] -> Vector NPair
forall a b. (a -> b) -> a -> b
$ Vector Word8
-> Vector_Nucs_half Nucleotides -> Vector Cigar -> [NPair]
forall a (v :: * -> *) (v :: * -> *).
(Unbox a, Eq a, Num a, Vector v Nucleotides, Vector v Cigar) =>
Vector a -> v Nucleotides -> v Cigar -> [NPair]
step Vector Word8
ref0 Vector_Nucs_half Nucleotides
qry0 Vector Cigar
cig0
  where
    step :: Vector a -> v Nucleotides -> v Cigar -> [NPair]
step ref :: Vector a
ref qry :: v Nucleotides
qry cig1 :: v Cigar
cig1
        | Vector a -> Bool
forall a. Unbox a => Vector a -> Bool
U.null Vector a
ref Bool -> Bool -> Bool
|| v Nucleotides -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
G.null v Nucleotides
qry Bool -> Bool -> Bool
|| v Cigar -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
G.null v Cigar
cig1 = []
        | Bool
otherwise = case v Cigar -> Cigar
forall (v :: * -> *) a. Vector v a => v a -> a
G.unsafeHead v Cigar
cig1 of { op :: CigOp
op :* n :: Int
n ->
                      case v Cigar -> v Cigar
forall (v :: * -> *) a. Vector v a => v a -> v a
G.unsafeTail v Cigar
cig1 of { cig :: v Cigar
cig ->
                      case CigOp
op of {

        Mat -> (a -> Nucleotides -> NPair) -> [a] -> [Nucleotides] -> [NPair]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (Nucleotides -> Nucleotides -> NPair
npair (Nucleotides -> Nucleotides -> NPair)
-> (a -> Nucleotides) -> a -> Nucleotides -> NPair
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. a -> Nucleotides
forall a. (Eq a, Num a) => a -> Nucleotides
nn) (Vector a -> [a]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> Vector a -> Vector a
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
n Vector a
ref))
                                    (v Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
n v Nucleotides
qry)) [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ Vector a -> v Nucleotides -> v Cigar -> [NPair]
step (Int -> Vector a -> Vector a
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n Vector a
ref) (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n v Nucleotides
qry) v Cigar
cig ;
        Del ->                                                   Vector a -> v Nucleotides -> v Cigar -> [NPair]
step (Int -> Vector a -> Vector a
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n Vector a
ref)           v Nucleotides
qry  v Cigar
cig ;
        Ins -> (Nucleotides -> NPair) -> [Nucleotides] -> [NPair]
forall a b. (a -> b) -> [a] -> [b]
map (Nucleotides -> Nucleotides -> NPair
npair Nucleotides
gap) (v Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
n v Nucleotides
qry))      [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ Vector a -> v Nucleotides -> v Cigar -> [NPair]
step           Vector a
ref  (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n v Nucleotides
qry) v Cigar
cig ;
        SMa -> (Nucleotides -> NPair) -> [Nucleotides] -> [NPair]
forall a b. (a -> b) -> [a] -> [b]
map (Nucleotides -> Nucleotides -> NPair
npair Nucleotides
gap) (v Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
n v Nucleotides
qry))      [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ Vector a -> v Nucleotides -> v Cigar -> [NPair]
step           Vector a
ref  (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n v Nucleotides
qry) v Cigar
cig ;
        HMa -> Int -> NPair -> [NPair]
forall a. Int -> a -> [a]
replicate Int
n (Nucleotides -> Nucleotides -> NPair
npair Nucleotides
gap Nucleotides
nucsN)                  [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ Vector a -> v Nucleotides -> v Cigar -> [NPair]
step           Vector a
ref            v Nucleotides
qry  v Cigar
cig ;
        Nop ->                                                   Vector a -> v Nucleotides -> v Cigar -> [NPair]
step           Vector a
ref            v Nucleotides
qry  v Cigar
cig ;
        Pad ->                                                   Vector a -> v Nucleotides -> v Cigar -> [NPair]
step           Vector a
ref            v Nucleotides
qry  v Cigar
cig }}}

    nn :: a -> Nucleotides
nn 0 = Nucleotides
nucsT
    nn 1 = Nucleotides
nucsC
    nn 2 = Nucleotides
nucsA
    nn 3 = Nucleotides
nucsG
    nn _ = Nucleotides
nucsN


-- | Reconstructs the alignment from query, cigar, and md.  Only
-- positions where the query is not gapped are produced.
alnFromMd :: Vector_Nucs_half Nucleotides -> VS.Vector Cigar -> [MdOp] -> VS.Vector NPair
alnFromMd :: Vector_Nucs_half Nucleotides
-> Vector Cigar -> [MdOp] -> Vector NPair
alnFromMd qry0 :: Vector_Nucs_half Nucleotides
qry0 cig0 :: Vector Cigar
cig0 md0 :: [MdOp]
md0 = [NPair] -> Vector NPair
forall a. Storable a => [a] -> Vector a
VS.fromList ([NPair] -> Vector NPair) -> [NPair] -> Vector NPair
forall a b. (a -> b) -> a -> b
$ Vector_Nucs_half Nucleotides -> Vector Cigar -> [MdOp] -> [NPair]
forall (v :: * -> *) (v :: * -> *).
(Vector v Nucleotides, Vector v Cigar) =>
v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step Vector_Nucs_half Nucleotides
qry0 Vector Cigar
cig0 [MdOp]
md0
  where
    step :: v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step qry :: v Nucleotides
qry cig1 :: v Cigar
cig1 md :: [MdOp]
md
        | v Nucleotides -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
G.null v Nucleotides
qry Bool -> Bool -> Bool
|| v Cigar -> Bool
forall (v :: * -> *) a. Vector v a => v a -> Bool
G.null v Cigar
cig1 Bool -> Bool -> Bool
|| [MdOp] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [MdOp]
md = []
        | Bool
otherwise = case v Cigar -> Cigar
forall (v :: * -> *) a. Vector v a => v a -> a
G.unsafeHead v Cigar
cig1 of op :: CigOp
op :* n :: Int
n -> v Nucleotides -> CigOp -> Int -> v Cigar -> [MdOp] -> [NPair]
step' v Nucleotides
qry CigOp
op Int
n (v Cigar -> v Cigar
forall (v :: * -> *) a. Vector v a => v a -> v a
G.unsafeTail v Cigar
cig1) [MdOp]
md

    step' :: v Nucleotides -> CigOp -> Int -> v Cigar -> [MdOp] -> [NPair]
step' qry :: v Nucleotides
qry  _ 0 cig :: v Cigar
cig             md :: [MdOp]
md  = v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step  v Nucleotides
qry      v Cigar
cig [MdOp]
md
    step' qry :: v Nucleotides
qry op :: CigOp
op n :: Int
n cig :: v Cigar
cig (MdNum  0 : md :: [MdOp]
md) = v Nucleotides -> CigOp -> Int -> v Cigar -> [MdOp] -> [NPair]
step' v Nucleotides
qry CigOp
op Int
n v Cigar
cig [MdOp]
md
    step' qry :: v Nucleotides
qry op :: CigOp
op n :: Int
n cig :: v Cigar
cig (MdDel [] : md :: [MdOp]
md) = v Nucleotides -> CigOp -> Int -> v Cigar -> [MdOp] -> [NPair]
step' v Nucleotides
qry CigOp
op Int
n v Cigar
cig [MdOp]
md

    step' qry :: v Nucleotides
qry Mat n :: Int
n cig :: v Cigar
cig (MdNum m :: Int
m : md :: [MdOp]
md)
            | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<  Int
m =    (Nucleotides -> NPair) -> [Nucleotides] -> [NPair]
forall a b. (a -> b) -> [a] -> [b]
map Nucleotides -> NPair
twin (v Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
n v Nucleotides
qry)) [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step  (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n v Nucleotides
qry)           v Cigar
cig (Int -> MdOp
MdNum (Int
mInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
n) MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
md)
            | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>  Int
m =    (Nucleotides -> NPair) -> [Nucleotides] -> [NPair]
forall a b. (a -> b) -> [a] -> [b]
map Nucleotides -> NPair
twin (v Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
m v Nucleotides
qry)) [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ v Nucleotides -> CigOp -> Int -> v Cigar -> [MdOp] -> [NPair]
step' (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
m v Nucleotides
qry) CigOp
Mat (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m) v Cigar
cig                [MdOp]
md
            | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
m =    (Nucleotides -> NPair) -> [Nucleotides] -> [NPair]
forall a b. (a -> b) -> [a] -> [b]
map Nucleotides -> NPair
twin (v Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
n v Nucleotides
qry)) [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step  (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n v Nucleotides
qry)           v Cigar
cig                [MdOp]
md
    step' qry :: v Nucleotides
qry Mat n :: Int
n cig :: v Cigar
cig (MdRep c :: Nucleotides
c : md :: [MdOp]
md) = Nucleotides -> Nucleotides -> NPair
npair Nucleotides
c (v Nucleotides -> Nucleotides
forall (v :: * -> *) a. Vector v a => v a -> a
G.head v Nucleotides
qry) NPair -> [NPair] -> [NPair]
forall a. a -> [a] -> [a]
: v Nucleotides -> CigOp -> Int -> v Cigar -> [MdOp] -> [NPair]
step' (v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => v a -> v a
G.tail   v Nucleotides
qry) CigOp
Mat (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) v Cigar
cig                [MdOp]
md
    step'   _ Mat _   _          _     = []

    step' qry :: v Nucleotides
qry Del n :: Int
n cig :: v Cigar
cig (MdDel (_:ss :: [Nucleotides]
ss) : md :: [MdOp]
md) = v Nucleotides -> CigOp -> Int -> v Cigar -> [MdOp] -> [NPair]
step' v Nucleotides
qry CigOp
Del (Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-1) v Cigar
cig ([Nucleotides] -> MdOp
MdDel [Nucleotides]
ss MdOp -> [MdOp] -> [MdOp]
forall a. a -> [a] -> [a]
: [MdOp]
md)
    step'   _ Del _   _               _     = []

    step' qry :: v Nucleotides
qry Ins n :: Int
n cig :: v Cigar
cig                 md :: [MdOp]
md  = (Nucleotides -> NPair) -> [Nucleotides] -> [NPair]
forall a b. (a -> b) -> [a] -> [b]
map (Nucleotides -> Nucleotides -> NPair
npair Nucleotides
gap) (v Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
n v Nucleotides
qry)) [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n v Nucleotides
qry) v Cigar
cig [MdOp]
md
    step' qry :: v Nucleotides
qry SMa n :: Int
n cig :: v Cigar
cig                 md :: [MdOp]
md  = (Nucleotides -> NPair) -> [Nucleotides] -> [NPair]
forall a b. (a -> b) -> [a] -> [b]
map (Nucleotides -> Nucleotides -> NPair
npair Nucleotides
gap) (v Nucleotides -> [Nucleotides]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.take Int
n v Nucleotides
qry)) [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step (Int -> v Nucleotides -> v Nucleotides
forall (v :: * -> *) a. Vector v a => Int -> v a -> v a
G.drop Int
n v Nucleotides
qry) v Cigar
cig [MdOp]
md
    step' qry :: v Nucleotides
qry HMa n :: Int
n cig :: v Cigar
cig                 md :: [MdOp]
md  =             Int -> NPair -> [NPair]
forall a. Int -> a -> [a]
replicate Int
n (Nucleotides -> Nucleotides -> NPair
npair Nucleotides
gap Nucleotides
nucsN) [NPair] -> [NPair] -> [NPair]
forall a. [a] -> [a] -> [a]
++ v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step           v Nucleotides
qry  v Cigar
cig [MdOp]
md
    step' qry :: v Nucleotides
qry Nop _ cig :: v Cigar
cig                 md :: [MdOp]
md  =                                              v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step           v Nucleotides
qry  v Cigar
cig [MdOp]
md
    step' qry :: v Nucleotides
qry Pad _ cig :: v Cigar
cig                 md :: [MdOp]
md  =                                              v Nucleotides -> v Cigar -> [MdOp] -> [NPair]
step           v Nucleotides
qry  v Cigar
cig [MdOp]
md

    twin :: Nucleotides -> NPair
twin q :: Nucleotides
q = Nucleotides -> Nucleotides -> NPair
npair Nucleotides
q Nucleotides
q

-- | Number of mismatches allowed by BWA.
-- @bwa_cal_maxdiff thresh len@ returns the number of mismatches
-- @bwa aln -n $tresh@ would allow in a read of length @len@.  For
-- reference, here is the code from BWA that computes it (we assume @err
-- = 0.02@, just like BWA):
--
-- @
-- int bwa_cal_maxdiff(int l, double err, double thres)
--   {
--      double elambda = exp(-l * err);
--      double sum, y = 1.0;
--      int k, x = 1;
--      for (k = 1, sum = elambda; k < 1000; ++k) {
--          y *= l * err;
--          x *= k;
--          sum += elambda * y / x;
--          if (1.0 - sum < thres) return k;
--      }
--      return 2;
--   }
-- @

bwa_cal_maxdiff :: Double -> Int -> Int
bwa_cal_maxdiff :: Double -> Int -> Int
bwa_cal_maxdiff thresh :: Double
thresh len :: Int
len = Int
k_finInt -> Int -> Int
forall a. Num a => a -> a -> a
-1
  where
    (k_fin :: Int
k_fin, _, _, _) = [(Int, Double, Double, Double)] -> (Int, Double, Double, Double)
forall a. [a] -> a
head ([(Int, Double, Double, Double)] -> (Int, Double, Double, Double))
-> [(Int, Double, Double, Double)] -> (Int, Double, Double, Double)
forall a b. (a -> b) -> a -> b
$ ((Int, Double, Double, Double) -> Bool)
-> [(Int, Double, Double, Double)]
-> [(Int, Double, Double, Double)]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile (Int, Double, Double, Double) -> Bool
forall a c d. (a, Double, c, d) -> Bool
bad ([(Int, Double, Double, Double)]
 -> [(Int, Double, Double, Double)])
-> [(Int, Double, Double, Double)]
-> [(Int, Double, Double, Double)]
forall a b. (a -> b) -> a -> b
$ ((Int, Double, Double, Double) -> (Int, Double, Double, Double))
-> (Int, Double, Double, Double) -> [(Int, Double, Double, Double)]
forall a. (a -> a) -> a -> [a]
iterate (Int, Double, Double, Double) -> (Int, Double, Double, Double)
forall a.
Integral a =>
(a, Double, Double, Double) -> (a, Double, Double, Double)
step (1,Double
elambda,1,1)

    err :: Double
err = 0.02
    elambda :: Double
elambda = Double -> Double
forall a. Floating a => a -> a
exp (Double -> Double) -> (Double -> Double) -> Double -> Double
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Double -> Double
forall a. Num a => a -> a
negate (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
err

    step :: (a, Double, Double, Double) -> (a, Double, Double, Double)
step (k :: a
k, s :: Double
s, x :: Double
x, y :: Double
y) = (a
ka -> a -> a
forall a. Num a => a -> a -> a
+1, Double
s', Double
x', Double
y')
      where y' :: Double
y' = Double
y Double -> Double -> Double
forall a. Num a => a -> a -> a
* Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
err
            x' :: Double
x' = Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
* a -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
k
            s' :: Double
s' = Double
s Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
elambda Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
y' Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
x'

    bad :: (a, Double, c, d) -> Bool
bad (_, s :: Double
s, _, _) = 1Double -> Double -> Double
forall a. Num a => a -> a -> a
-Double
s Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
thresh