module Bio.Bam.Filter (
filterPairs, QualFilter,
complexSimple, complexEntropy,
qualityAverage, qualityMinimum,
qualityFromOldIllumina, qualityFromNewIllumina
) where
import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Prelude
import Bio.Streaming
import qualified Data.Vector.Generic as V
filterPairs :: Monad m => (BamRec -> [BamRec])
-> (Maybe BamRec -> Maybe BamRec -> [BamRec])
-> Stream (Of BamRec) m r -> Stream (Of BamRec) m r
filterPairs :: (BamRec -> [BamRec])
-> (Maybe BamRec -> Maybe BamRec -> [BamRec])
-> Stream (Of BamRec) m r
-> Stream (Of BamRec) m r
filterPairs ps :: BamRec -> [BamRec]
ps pp :: Maybe BamRec -> Maybe BamRec -> [BamRec]
pp = Stream (Of BamRec) m r -> Stream (Of BamRec) m r
forall b. Stream (Of BamRec) m b -> Stream (Of BamRec) m b
step
where
step :: Stream (Of BamRec) m b -> Stream (Of BamRec) m b
step = m (Either b (Of BamRec (Stream (Of BamRec) m b)))
-> Stream
(Of BamRec) m (Either b (Of BamRec (Stream (Of BamRec) m b)))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either b (Of BamRec (Stream (Of BamRec) m b)))
-> Stream
(Of BamRec) m (Either b (Of BamRec (Stream (Of BamRec) m b))))
-> (Stream (Of BamRec) m b
-> m (Either b (Of BamRec (Stream (Of BamRec) m b))))
-> Stream (Of BamRec) m b
-> Stream
(Of BamRec) m (Either b (Of BamRec (Stream (Of BamRec) m b)))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Stream (Of BamRec) m b
-> m (Either b (Of BamRec (Stream (Of BamRec) m b)))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect (Stream (Of BamRec) m b
-> Stream
(Of BamRec) m (Either b (Of BamRec (Stream (Of BamRec) m b))))
-> (Either b (Of BamRec (Stream (Of BamRec) m b))
-> Stream (Of BamRec) m b)
-> Stream (Of BamRec) m b
-> Stream (Of BamRec) m b
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (b -> Stream (Of BamRec) m b)
-> (Of BamRec (Stream (Of BamRec) m b) -> Stream (Of BamRec) m b)
-> Either b (Of BamRec (Stream (Of BamRec) m b))
-> Stream (Of BamRec) m b
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either b -> Stream (Of BamRec) m b
forall (f :: * -> *) a. Applicative f => a -> f a
pure Of BamRec (Stream (Of BamRec) m b) -> Stream (Of BamRec) m b
step'
step' :: Of BamRec (Stream (Of BamRec) m b) -> Stream (Of BamRec) m b
step' (b :: BamRec
b :> s :: Stream (Of BamRec) m b
s)
| BamRec -> Bool
isPaired BamRec
b = m (Either b (Of BamRec (Stream (Of BamRec) m b)))
-> Stream
(Of BamRec) m (Either b (Of BamRec (Stream (Of BamRec) m b)))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Stream (Of BamRec) m b
-> m (Either b (Of BamRec (Stream (Of BamRec) m b)))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect Stream (Of BamRec) m b
s) Stream
(Of BamRec) m (Either b (Of BamRec (Stream (Of BamRec) m b)))
-> (Either b (Of BamRec (Stream (Of BamRec) m b))
-> Stream (Of BamRec) m b)
-> Stream (Of BamRec) m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= BamRec
-> Either b (Of BamRec (Stream (Of BamRec) m b))
-> Stream (Of BamRec) m b
step'' BamRec
b
| Bool
otherwise = [BamRec] -> Stream (Of BamRec) m ()
forall (m :: * -> *) (f :: * -> *) a.
(Monad m, Foldable f) =>
f a -> Stream (Of a) m ()
each (BamRec -> [BamRec]
ps BamRec
b) Stream (Of BamRec) m ()
-> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
step Stream (Of BamRec) m b
s
step'' :: BamRec
-> Either b (Of BamRec (Stream (Of BamRec) m b))
-> Stream (Of BamRec) m b
step'' b :: BamRec
b (Left r :: b
r) = [BamRec] -> Stream (Of BamRec) m ()
forall (m :: * -> *) (f :: * -> *) a.
(Monad m, Foldable f) =>
f a -> Stream (Of a) m ()
each (Maybe BamRec -> Maybe BamRec -> [BamRec]
pp (BamRec -> Maybe BamRec
forall a. a -> Maybe a
Just BamRec
b) Maybe BamRec
forall a. Maybe a
Nothing) Stream (Of BamRec) m ()
-> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> b -> Stream (Of BamRec) m b
forall (f :: * -> *) a. Applicative f => a -> f a
pure b
r
step'' b :: BamRec
b (Right (c :: BamRec
c :> s :: Stream (Of BamRec) m b
s))
| BamRec -> Refseq
b_rname BamRec
b Refseq -> Refseq -> Bool
forall a. Eq a => a -> a -> Bool
/= BamRec -> Refseq
b_rname BamRec
c Bool -> Bool -> Bool
|| Bool -> Bool
not (BamRec -> Bool
isPaired BamRec
c) =
let b' :: [BamRec]
b' = if BamRec -> Bool
isSecondMate BamRec
b then Maybe BamRec -> Maybe BamRec -> [BamRec]
pp Maybe BamRec
forall a. Maybe a
Nothing (BamRec -> Maybe BamRec
forall a. a -> Maybe a
Just BamRec
b) else Maybe BamRec -> Maybe BamRec -> [BamRec]
pp (BamRec -> Maybe BamRec
forall a. a -> Maybe a
Just BamRec
b) Maybe BamRec
forall a. Maybe a
Nothing
in [BamRec] -> Stream (Of BamRec) m ()
forall (m :: * -> *) (f :: * -> *) a.
(Monad m, Foldable f) =>
f a -> Stream (Of a) m ()
each [BamRec]
b' Stream (Of BamRec) m ()
-> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Of BamRec (Stream (Of BamRec) m b) -> Stream (Of BamRec) m b
step' (BamRec
c BamRec
-> Stream (Of BamRec) m b -> Of BamRec (Stream (Of BamRec) m b)
forall a b. a -> b -> Of a b
:> Stream (Of BamRec) m b
s)
| BamRec -> Bool
isFirstMate BamRec
c Bool -> Bool -> Bool
&& BamRec -> Bool
isSecondMate BamRec
b = BamRec
-> BamRec -> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
step''' BamRec
c BamRec
b Stream (Of BamRec) m b
s
| Bool
otherwise = BamRec
-> BamRec -> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
step''' BamRec
b BamRec
c Stream (Of BamRec) m b
s
step''' :: BamRec
-> BamRec -> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
step''' b :: BamRec
b c :: BamRec
c s :: Stream (Of BamRec) m b
s = [BamRec] -> Stream (Of BamRec) m ()
forall (m :: * -> *) (f :: * -> *) a.
(Monad m, Foldable f) =>
f a -> Stream (Of a) m ()
each (Maybe BamRec -> Maybe BamRec -> [BamRec]
pp (BamRec -> Maybe BamRec
forall a. a -> Maybe a
Just BamRec
b) (BamRec -> Maybe BamRec
forall a. a -> Maybe a
Just BamRec
c)) Stream (Of BamRec) m ()
-> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Stream (Of BamRec) m b -> Stream (Of BamRec) m b
step Stream (Of BamRec) m b
s
type QualFilter = BamRec -> BamRec
{-# INLINE count #-}
count :: (V.Vector v a, Eq a) => a -> v a -> Int
count :: a -> v a -> Int
count x :: a
x = (Int -> a -> Int) -> Int -> v a -> Int
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
V.foldl' (\acc :: Int
acc y :: a
y -> if a
x a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
y then Int
accInt -> Int -> Int
forall a. Num a => a -> a -> a
+1 else Int
acc) 0
{-# INLINE complexSimple #-}
complexSimple :: Double -> QualFilter
complexSimple :: Double -> QualFilter
complexSimple r :: Double
r b :: BamRec
b = if Bool
p then BamRec
b else BamRec
b'
where
b' :: BamRec
b' = Char -> QualFilter
setQualFlag 'C' QualFilter -> QualFilter
forall a b. (a -> b) -> a -> b
$ BamRec
b { b_flag :: Int
b_flag = BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFailsQC }
p :: Bool
p = let counts :: [Int]
counts = [ Nucleotides -> Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. (Vector v a, Eq a) => a -> v a -> Int
count Nucleotides
x (Vector_Nucs_half Nucleotides -> Int)
-> Vector_Nucs_half Nucleotides -> Int
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b | Nucleotides
x <- [Nucleotides]
properBases ]
lim :: Int
lim = Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
floor (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
r Double -> Double -> Double
forall a. Num a => a -> a -> a
* Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral ([Int] -> Int
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Int]
counts)
in (Int -> Bool) -> [Int] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
lim) [Int]
counts
{-# INLINE complexEntropy #-}
complexEntropy :: Double -> QualFilter
complexEntropy :: Double -> QualFilter
complexEntropy r :: Double
r b :: BamRec
b = if Bool
p then BamRec
b else BamRec
b'
where
b' :: BamRec
b' = Char -> QualFilter
setQualFlag 'C' QualFilter -> QualFilter
forall a b. (a -> b) -> a -> b
$ BamRec
b { b_flag :: Int
b_flag = BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFailsQC }
p :: Bool
p = Double
ent Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>= Double
r Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
total
counts :: [Int]
counts = [ Nucleotides -> Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. (Vector v a, Eq a) => a -> v a -> Int
count Nucleotides
x (Vector_Nucs_half Nucleotides -> Int)
-> Vector_Nucs_half Nucleotides -> Int
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b | Nucleotides
x <- [Nucleotides]
properBases ]
total :: Double
total = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (Vector_Nucs_half Nucleotides -> Int)
-> Vector_Nucs_half Nucleotides -> Int
forall a b. (a -> b) -> a -> b
$ BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b
ent :: Double
ent = [Double] -> Double
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
c Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
forall a. Floating a => a -> a
log (Double
total Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
c) | Int
c <- [Int]
counts, Int
c Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= 0 ] Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double -> Double
forall a. Floating a => a -> a
log 2
{-# INLINE qualityAverage #-}
qualityAverage :: Int -> QualFilter
qualityAverage :: Int -> QualFilter
qualityAverage q :: Int
q b :: BamRec
b = if Bool
p then BamRec
b else BamRec
b'
where
b' :: BamRec
b' = Char -> QualFilter
setQualFlag 'Q' QualFilter -> QualFilter
forall a b. (a -> b) -> a -> b
$ BamRec
b { b_flag :: Int
b_flag = BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFailsQC }
p :: Bool
p = case BamRec -> Maybe (Vector Qual)
b_qual BamRec
b of Nothing -> Bool
True
Just qs :: Vector Qual
qs -> let total :: Int
total = (Int -> Qual -> Int) -> Int -> Vector Qual -> Int
forall (v :: * -> *) b a.
Vector v b =>
(a -> b -> a) -> a -> v b -> a
V.foldl' (\a :: Int
a x :: Qual
x -> Int
a Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Qual -> Word8
unQ Qual
x)) 0 Vector Qual
qs
in Int
total Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
q Int -> Int -> Int
forall a. Num a => a -> a -> a
* Vector Qual -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length Vector Qual
qs
{-# INLINE qualityMinimum #-}
qualityMinimum :: Int -> Qual -> QualFilter
qualityMinimum :: Int -> Qual -> QualFilter
qualityMinimum n :: Int
n (Q q :: Word8
q) b :: BamRec
b = if Bool
p then BamRec
b else BamRec
b'
where
b' :: BamRec
b' = Char -> QualFilter
setQualFlag 'Q' QualFilter -> QualFilter
forall a b. (a -> b) -> a -> b
$ BamRec
b { b_flag :: Int
b_flag = BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFailsQC }
p :: Bool
p = case BamRec -> Maybe (Vector Qual)
b_qual BamRec
b of Nothing -> Bool
True
Just qs :: Vector Qual
qs -> Vector Qual -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length ((Qual -> Bool) -> Vector Qual -> Vector Qual
forall (v :: * -> *) a. Vector v a => (a -> Bool) -> v a -> v a
V.filter (Qual -> Qual -> Bool
forall a. Ord a => a -> a -> Bool
< Word8 -> Qual
Q Word8
q) Vector Qual
qs) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
n
qualityFromOldIllumina :: BamRec -> BamRec
qualityFromOldIllumina :: QualFilter
qualityFromOldIllumina b :: BamRec
b = BamRec
b { b_qual :: Maybe (Vector Qual)
b_qual = (Qual -> Qual) -> Vector Qual -> Vector Qual
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
V.map Qual -> Qual
conv (Vector Qual -> Vector Qual)
-> Maybe (Vector Qual) -> Maybe (Vector Qual)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BamRec -> Maybe (Vector Qual)
b_qual BamRec
b }
where
conv :: Qual -> Qual
conv (Q s :: Word8
s) = let s' :: Double
s' :: Double
s' = Double -> Double
forall a. Floating a => a -> a
exp (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double -> Double
forall a. Floating a => a -> a
log 10 Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Word8 -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
s Double -> Double -> Double
forall a. Num a => a -> a -> a
- 31) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (-10)
p :: Double
p = Double
s' Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (1Double -> Double -> Double
forall a. Num a => a -> a -> a
+Double
s')
q :: Double
q = - 10 Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
forall a. Floating a => a -> a
log Double
p Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double -> Double
forall a. Floating a => a -> a
log 10
in Word8 -> Qual
Q (Double -> Word8
forall a b. (RealFrac a, Integral b) => a -> b
round Double
q)
qualityFromNewIllumina :: BamRec -> BamRec
qualityFromNewIllumina :: QualFilter
qualityFromNewIllumina b :: BamRec
b = BamRec
b { b_qual :: Maybe (Vector Qual)
b_qual = (Qual -> Qual) -> Vector Qual -> Vector Qual
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
V.map (Word8 -> Qual
Q (Word8 -> Qual) -> (Qual -> Word8) -> Qual -> Qual
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
subtract 31 (Word8 -> Word8) -> (Qual -> Word8) -> Qual -> Word8
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Qual -> Word8
unQ) (Vector Qual -> Vector Qual)
-> Maybe (Vector Qual) -> Maybe (Vector Qual)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BamRec -> Maybe (Vector Qual)
b_qual BamRec
b }