module Bio.Bam.Evan
( fixupFlagAbuse
, fixupBwaFlags
, removeWarts
) where
import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Prelude
import qualified Data.ByteString.Char8 as S
fixupFlagAbuse :: BamRec -> BamRec
fixupFlagAbuse :: BamRec -> BamRec
fixupFlagAbuse b :: BamRec
b =
(if BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
flag_low_quality Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= 0 then Char -> BamRec -> BamRec
setQualFlag 'Q' else BamRec -> BamRec
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id) (BamRec -> BamRec) -> BamRec -> BamRec
forall a b. (a -> b) -> a -> b
$
(if BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
flag_low_complexity Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= 0 then Char -> BamRec -> BamRec
setQualFlag 'C' else BamRec -> BamRec
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id) (BamRec -> BamRec) -> BamRec -> BamRec
forall a b. (a -> b) -> a -> b
$
BamRec
b { b_flag :: Int
b_flag = Int
cleaned_flags, b_exts :: Extensions
b_exts = Extensions
cleaned_exts }
where
flags' :: Int
flags' = BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int -> Int
forall a. Bits a => a -> a
complement (Int
flag_low_quality Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flag_low_complexity)
cleaned_flags :: Int
cleaned_flags | Int
flags' Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
flagPaired Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0 = Int
flags' Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int -> Int
forall a. Bits a => a -> a
complement (Int
flagFirstMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagSecondMate)
| Bool
otherwise = Int
flags'
flag_low_quality :: Int
flag_low_quality = 0x800
flag_low_complexity :: Int
flag_low_complexity = 0x1000
is_merged :: Bool
is_merged = Int
flags' Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. (Int
flagPaired Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFirstMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagSecondMate) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
flagFirstMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagSecondMate
is_trimmed :: Bool
is_trimmed = Int
flags' Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. (Int
flagPaired Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFirstMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagSecondMate) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
flagSecondMate
newflags :: Int
newflags = (if Bool
is_merged then Int
eflagMerged else 0) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (if Bool
is_trimmed then Int
eflagTrimmed else 0)
cleaned_exts :: Extensions
cleaned_exts = case (BamKey -> Extensions -> Maybe Ext
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "FF" (BamRec -> Extensions
b_exts BamRec
b), BamKey -> Extensions -> Maybe Ext
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup "XF" (BamRec -> Extensions
b_exts BamRec
b)) of
( Just (Int i :: Int
i), _ ) -> BamKey -> Ext -> Extensions -> Extensions
updateE "FF" (Int -> Ext
Int (Int
i Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
newflags)) (BamRec -> Extensions
b_exts BamRec
b)
( _, Just (Int i :: Int
i) ) -> BamKey -> Ext -> Extensions -> Extensions
updateE "FF" (Int -> Ext
Int (Int
i Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
newflags)) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamKey -> Extensions -> Extensions
deleteE "XF" (BamRec -> Extensions
b_exts BamRec
b)
_ | Int
newflags Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= 0 -> BamKey -> Ext -> Extensions -> Extensions
updateE "FF" (Int -> Ext
Int Int
newflags ) (BamRec -> Extensions
b_exts BamRec
b)
| Bool
otherwise -> BamRec -> Extensions
b_exts BamRec
b
fixupBwaFlags :: BamRec -> BamRec
fixupBwaFlags :: BamRec -> BamRec
fixupBwaFlags b :: BamRec
b = BamRec
b { b_flag :: Int
b_flag = Int -> Int
fixPP (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ BamRec -> Int
b_flag BamRec
b Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. if Bool
mu then Int
flagMateUnmapped else 0 }
where
mu :: Bool
mu = [Bool] -> Bool
forall (t :: * -> *). Foldable t => t Bool -> Bool
and [ BamRec -> Bool
isPaired BamRec
b, Bool -> Bool
not (BamRec -> Bool
isUnmapped BamRec
b)
, BamRec -> Bool
isReversed BamRec
b Bool -> Bool -> Bool
forall a. Eq a => a -> a -> Bool
== BamRec -> Bool
isMateReversed BamRec
b
, BamRec -> Refseq
b_rname BamRec
b Refseq -> Refseq -> Bool
forall a. Eq a => a -> a -> Bool
== BamRec -> Refseq
b_mrnm BamRec
b, BamRec -> Int
b_pos BamRec
b Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== BamRec -> Int
b_mpos BamRec
b ]
fixPP :: Int -> Int
fixPP f :: Int
f | Int
f Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. (Int
flagUnmapped Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagMateUnmapped) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== 0 = Int
f
| Bool
otherwise = Int
f Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int -> Int
forall a. Bits a => a -> a
complement Int
flagProperlyPaired
removeWarts :: BamRec -> BamRec
removeWarts :: BamRec -> BamRec
removeWarts br :: BamRec
br = BamRec
br { b_qname :: Bytes
b_qname = Bytes
name, b_flag :: Int
b_flag = Int
flags, b_exts :: Extensions
b_exts = Extensions
tags }
where
(name :: Bytes
name, flags :: Int
flags, tags :: Extensions
tags) = (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
checkFR ((Bytes, Int, Extensions) -> (Bytes, Int, Extensions))
-> (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
forall a b. (a -> b) -> a -> b
$ (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
forall b. (Bytes, b, Extensions) -> (Bytes, b, Extensions)
checkC ((Bytes, Int, Extensions) -> (Bytes, Int, Extensions))
-> (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
forall a b. (a -> b) -> a -> b
$ (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
forall b. (Bytes, b, Extensions) -> (Bytes, b, Extensions)
checkSharp (BamRec -> Bytes
b_qname BamRec
br, BamRec -> Int
b_flag BamRec
br, BamRec -> Extensions
b_exts BamRec
br)
checkFR :: (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
checkFR (n :: Bytes
n,f :: Int
f,t :: Extensions
t) | "F_" Bytes -> Bytes -> Bool
`S.isPrefixOf` Bytes
n = (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
forall b. (Bytes, b, Extensions) -> (Bytes, b, Extensions)
checkC (Int -> Bytes -> Bytes
S.drop 2 Bytes
n, Int
f Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFirstMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagPaired, Extensions
t)
| "R_" Bytes -> Bytes -> Bool
`S.isPrefixOf` Bytes
n = (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
forall b. (Bytes, b, Extensions) -> (Bytes, b, Extensions)
checkC (Int -> Bytes -> Bytes
S.drop 2 Bytes
n, Int
f Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagSecondMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagPaired, Extensions
t)
| "M_" Bytes -> Bytes -> Bool
`S.isPrefixOf` Bytes
n = (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
forall b. (Bytes, b, Extensions) -> (Bytes, b, Extensions)
checkC (Int -> Bytes -> Bytes
S.drop 2 Bytes
n, Int
f, BamKey -> Ext -> Extensions -> Extensions
insertE "FF" (Int -> Ext
Int Int
eflagMerged) Extensions
t)
| "T_" Bytes -> Bytes -> Bool
`S.isPrefixOf` Bytes
n = (Bytes, Int, Extensions) -> (Bytes, Int, Extensions)
forall b. (Bytes, b, Extensions) -> (Bytes, b, Extensions)
checkC (Int -> Bytes -> Bytes
S.drop 2 Bytes
n, Int
f, BamKey -> Ext -> Extensions -> Extensions
insertE "FF" (Int -> Ext
Int Int
eflagTrimmed) Extensions
t)
| "/1" Bytes -> Bytes -> Bool
`S.isSuffixOf` Bytes
n = ( Int -> Bytes -> Bytes
rdrop 2 Bytes
n, Int
f Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagFirstMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagPaired, Extensions
t)
| "/2" Bytes -> Bytes -> Bool
`S.isSuffixOf` Bytes
n = ( Int -> Bytes -> Bytes
rdrop 2 Bytes
n, Int
f Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagSecondMate Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Int
flagPaired, Extensions
t)
| Bool
otherwise = ( Bytes
n, Int
f, Extensions
t)
checkC :: (Bytes, b, Extensions) -> (Bytes, b, Extensions)
checkC (n :: Bytes
n,f :: b
f,t :: Extensions
t) | "C_" Bytes -> Bytes -> Bool
`S.isPrefixOf` Bytes
n = (Int -> Bytes -> Bytes
S.drop 2 Bytes
n, b
f, BamKey -> Ext -> Extensions -> Extensions
insertE "XP" (Int -> Ext
Int (-1)) Extensions
t)
| Bool
otherwise = ( Bytes
n, b
f, Extensions
t)
rdrop :: Int -> Bytes -> Bytes
rdrop n :: Int
n s :: Bytes
s = Int -> Bytes -> Bytes
S.take (Bytes -> Int
S.length Bytes
s Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
n) Bytes
s
checkSharp :: (Bytes, b, Extensions) -> (Bytes, b, Extensions)
checkSharp (n :: Bytes
n,f :: b
f,t :: Extensions
t) = case Char -> Bytes -> [Bytes]
S.split '#' Bytes
n of [n' :: Bytes
n',ts :: Bytes
ts] -> (Bytes
n', b
f, Bytes -> Extensions -> Extensions
insertTags Bytes
ts Extensions
t)
_ -> ( Bytes
n, b
f, Extensions
t)
insertTags :: Bytes -> Extensions -> Extensions
insertTags ts :: Bytes
ts t :: Extensions
t | Bytes -> Bool
S.null Bytes
y = BamKey -> Ext -> Extensions -> Extensions
insertE "XI" (Bytes -> Ext
Text Bytes
ts) Extensions
t
| Bool
otherwise = BamKey -> Ext -> Extensions -> Extensions
insertE "XI" (Bytes -> Ext
Text Bytes
x) (Extensions -> Extensions) -> Extensions -> Extensions
forall a b. (a -> b) -> a -> b
$ BamKey -> Ext -> Extensions -> Extensions
insertE "XJ" (Bytes -> Ext
Text (Bytes -> Ext) -> Bytes -> Ext
forall a b. (a -> b) -> a -> b
$ Bytes -> Bytes
S.tail Bytes
y) Extensions
t
where (x :: Bytes
x,y :: Bytes
y) = (Char -> Bool) -> Bytes -> (Bytes, Bytes)
S.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ',') Bytes
ts