{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
module Bio.Data.Bam
( BAM
, getBamHeader
, streamBam
, sinkBam
, bamToBedC
, bamToBed
, bamToFragmentC
, bamToFragment
, bamToFastqC
, bamToFastq
, sortedBamToBedPE
) where
import Control.Monad (mzero)
import Data.List (foldl')
import Bio.Data.Bed
import Bio.Data.Fastq
import Bio.HTS
import Conduit
bamToBedC :: MonadIO m => BAMHeader -> ConduitT BAM BED m ()
bamToBedC :: BAMHeader -> ConduitT BAM BED m ()
bamToBedC BAMHeader
header = (BAM -> Maybe BED) -> ConduitT BAM (Maybe BED) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (BAMHeader -> BAM -> Maybe BED
bamToBed BAMHeader
header) ConduitT BAM (Maybe BED) m ()
-> ConduitM (Maybe BED) BED m () -> ConduitT BAM BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe BED) BED m ()
forall (m :: * -> *) mono.
(Monad m, MonoFoldable mono) =>
ConduitT mono (Element mono) m ()
concatC
{-# INLINE bamToBedC #-}
bamToFastqC :: Monad m => ConduitT BAM Fastq m ()
bamToFastqC :: ConduitT BAM Fastq m ()
bamToFastqC = (BAM -> Maybe Fastq) -> ConduitT BAM (Maybe Fastq) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC BAM -> Maybe Fastq
bamToFastq ConduitT BAM (Maybe Fastq) m ()
-> ConduitM (Maybe Fastq) Fastq m () -> ConduitT BAM Fastq m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe Fastq) Fastq m ()
forall (m :: * -> *) mono.
(Monad m, MonoFoldable mono) =>
ConduitT mono (Element mono) m ()
concatC
{-# INLINE bamToFastqC #-}
bamToFragmentC :: Monad m => BAMHeader -> ConduitT BAM BED m ()
bamToFragmentC :: BAMHeader -> ConduitT BAM BED m ()
bamToFragmentC BAMHeader
header = (BAM -> Maybe BED) -> ConduitT BAM (Maybe BED) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (BAMHeader -> BAM -> Maybe BED
bamToFragment BAMHeader
header) ConduitT BAM (Maybe BED) m ()
-> ConduitM (Maybe BED) BED m () -> ConduitT BAM BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe BED) BED m ()
forall (m :: * -> *) mono.
(Monad m, MonoFoldable mono) =>
ConduitT mono (Element mono) m ()
concatC
{-# INLINE bamToFragmentC #-}
bamToFragment :: BAMHeader -> BAM -> Maybe BED
bamToFragment :: BAMHeader -> BAM -> Maybe BED
bamToFragment BAMHeader
header BAM
bam
| Bool -> Bool
not (Word16 -> Bool
isFirstSegment Word16
flg) = Maybe BED
forall a. Maybe a
Nothing
| Bool
otherwise = do
ByteString
chr1 <- BAMHeader -> BAM -> Maybe ByteString
refName BAMHeader
header BAM
bam
ByteString
chr2 <- BAMHeader -> BAM -> Maybe ByteString
mateRefName BAMHeader
header BAM
bam
if ByteString
chr1 ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
chr2
then BED -> Maybe BED
forall (m :: * -> *) a. Monad m => a -> m a
return (BED -> Maybe BED) -> BED -> Maybe BED
forall a b. (a -> b) -> a -> b
$ ByteString
-> Int -> Int -> Maybe ByteString -> Maybe Int -> Maybe Bool -> BED
BED ByteString
chr1 (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
start1 Int
start2) (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
end1 Int
end2)
(ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ BAM -> ByteString
queryName BAM
bam) Maybe Int
forall a. Maybe a
Nothing Maybe Bool
forall a. Maybe a
Nothing
else Maybe BED
forall (m :: * -> *) a. MonadPlus m => m a
mzero
where
start1 :: Int
start1 = BAM -> Int
startLoc BAM
bam
end1 :: Int
end1 = BAM -> Int
endLoc BAM
bam
start2 :: Int
start2 = BAM -> Int
mateStartLoc BAM
bam
end2 :: Int
end2 = BAM -> Int
mateStartLoc BAM
bam Int -> Int -> Int
forall a. Num a => a -> a -> a
+ CIGAR -> Int
ciglen CIGAR
cig
cig :: CIGAR
cig = case (Char, Char) -> BAM -> Maybe AuxiliaryData
queryAuxData (Char
'M', Char
'C') BAM
bam of
Just (AuxString ByteString
x) -> ByteString -> CIGAR
string2Cigar ByteString
x
Maybe AuxiliaryData
_ -> [Char] -> CIGAR
forall a. HasCallStack => [Char] -> a
error [Char]
"No MC tag. Please run samtools fixmate on file first."
ciglen :: CIGAR -> Int
ciglen (CIGAR [(Int, Char)]
c) = (Int -> (Int, Char) -> Int) -> Int -> [(Int, Char)] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Int -> (Int, Char) -> Int
forall p. Num p => p -> (p, Char) -> p
f Int
0 [(Int, Char)]
c
where f :: p -> (p, Char) -> p
f p
acc (p
n,Char
x) = if Char
x Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
"MDN=X" then p
n p -> p -> p
forall a. Num a => a -> a -> a
+ p
acc else p
acc
flg :: Word16
flg = BAM -> Word16
flag BAM
bam
{-# INLINE bamToFragment #-}
sortedBamToBedPE :: Monad m => BAMHeader -> ConduitT BAM (BED, BED) m ()
sortedBamToBedPE :: BAMHeader -> ConduitT BAM (BED, BED) m ()
sortedBamToBedPE BAMHeader
header = case BAMHeader -> SortOrder
getSortOrder BAMHeader
header of
SortOrder
Queryname -> ConduitT BAM (Maybe (BED, BED)) m ()
loopBedPE ConduitT BAM (Maybe (BED, BED)) m ()
-> ConduitM (Maybe (BED, BED)) (BED, BED) m ()
-> ConduitT BAM (BED, BED) m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe (BED, BED)) (BED, BED) m ()
forall (m :: * -> *) mono.
(Monad m, MonoFoldable mono) =>
ConduitT mono (Element mono) m ()
concatC
SortOrder
_ -> [Char] -> ConduitT BAM (BED, BED) m ()
forall a. HasCallStack => [Char] -> a
error [Char]
"Bam file must be sorted by NAME."
where
loopBedPE :: ConduitT BAM (Maybe (BED, BED)) m ()
loopBedPE = (,) (BAM -> BAM -> (BAM, BAM))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe BAM)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe (BAM -> (BAM, BAM)))
forall a b.
(a -> b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
<$$> ConduitT BAM (Maybe (BED, BED)) m (Maybe BAM)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT BAM (Maybe (BED, BED)) m (Maybe (BAM -> (BAM, BAM)))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe BAM)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe (BAM, BAM))
forall a b.
ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
<***> ConduitT BAM (Maybe (BED, BED)) m (Maybe BAM)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT BAM (Maybe (BED, BED)) m (Maybe (BAM, BAM))
-> (Maybe (BAM, BAM) -> ConduitT BAM (Maybe (BED, BED)) m ())
-> ConduitT BAM (Maybe (BED, BED)) m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Maybe (BAM, BAM)
Nothing -> () -> ConduitT BAM (Maybe (BED, BED)) m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just (BAM
bam1, BAM
bam2) -> if BAM -> ByteString
queryName BAM
bam1 ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= BAM -> ByteString
queryName BAM
bam2
then [Char] -> ConduitT BAM (Maybe (BED, BED)) m ()
forall a. HasCallStack => [Char] -> a
error [Char]
"Adjacent records have different query names. Aborted."
else do
Maybe (BED, BED) -> ConduitT BAM (Maybe (BED, BED)) m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Maybe (BED, BED) -> ConduitT BAM (Maybe (BED, BED)) m ())
-> Maybe (BED, BED) -> ConduitT BAM (Maybe (BED, BED)) m ()
forall a b. (a -> b) -> a -> b
$ (,) (BED -> BED -> (BED, BED))
-> Maybe BED -> Maybe (BED -> (BED, BED))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BAMHeader -> BAM -> Maybe BED
bamToBed BAMHeader
header BAM
bam1 Maybe (BED -> (BED, BED)) -> Maybe BED -> Maybe (BED, BED)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> BAMHeader -> BAM -> Maybe BED
bamToBed BAMHeader
header BAM
bam2
ConduitT BAM (Maybe (BED, BED)) m ()
loopBedPE
where
<$$> :: (a -> b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
(<$$>) = (Maybe a -> Maybe b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Maybe a -> Maybe b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b))
-> ((a -> b) -> Maybe a -> Maybe b)
-> (a -> b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap
<***> :: ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
(<***>) = ConduitT BAM (Maybe (BED, BED)) m (Maybe a -> Maybe b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
(<*>) (ConduitT BAM (Maybe (BED, BED)) m (Maybe a -> Maybe b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b))
-> (ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a -> Maybe b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe (a -> b) -> Maybe a -> Maybe b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a -> Maybe b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
(<*>)
{-# INLINE sortedBamToBedPE #-}
bamToBed :: BAMHeader -> BAM -> Maybe BED
bamToBed :: BAMHeader -> BAM -> Maybe BED
bamToBed BAMHeader
header BAM
bam = ByteString -> BED
mkBed (ByteString -> BED) -> Maybe ByteString -> Maybe BED
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BAMHeader -> BAM -> Maybe ByteString
refName BAMHeader
header BAM
bam
where
mkBed :: ByteString -> BED
mkBed ByteString
chr = ByteString
-> Int -> Int -> Maybe ByteString -> Maybe Int -> Maybe Bool -> BED
BED ByteString
chr Int
start Int
end Maybe ByteString
nm Maybe Int
sc Maybe Bool
str
start :: Int
start = BAM -> Int
startLoc BAM
bam
end :: Int
end = BAM -> Int
endLoc BAM
bam
nm :: Maybe ByteString
nm = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ BAM -> ByteString
queryName BAM
bam
str :: Maybe Bool
str = Bool -> Maybe Bool
forall a. a -> Maybe a
Just (Bool -> Maybe Bool) -> Bool -> Maybe Bool
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ BAM -> Bool
isRev BAM
bam
sc :: Maybe Int
sc = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ BAM -> Word8
mapq BAM
bam
{-# INLINE bamToBed #-}
bamToFastq :: BAM -> Maybe Fastq
bamToFastq :: BAM -> Maybe Fastq
bamToFastq BAM
bam = ByteString -> ByteString -> ByteString -> Fastq
Fastq (BAM -> ByteString
queryName BAM
bam) (ByteString -> ByteString -> Fastq)
-> Maybe ByteString -> Maybe (ByteString -> Fastq)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BAM -> Maybe ByteString
getSeq BAM
bam Maybe (ByteString -> Fastq) -> Maybe ByteString -> Maybe Fastq
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> BAM -> Maybe ByteString
qualityS BAM
bam
{-# INLINE bamToFastq #-}