{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
module Bio.Data.Bam
    ( BAM
    , getBamHeader
    , streamBam
    , sinkBam
    , bamToBedC
    , bamToBed
    , bamToFragmentC
    , bamToFragment
    , bamToFastqC
    , bamToFastq
    , sortedBamToBedPE
    ) where

import Control.Monad (mzero)
import Data.List (foldl')
import           Bio.Data.Bed
import           Bio.Data.Bed.Types
import           Bio.Data.Fastq
import           Bio.HTS
import           Conduit

-- | Convert bam record to bed record. Unmapped reads will be discarded.
bamToBedC :: MonadIO m => BAMHeader -> ConduitT BAM BED m ()
bamToBedC :: BAMHeader -> ConduitT BAM BED m ()
bamToBedC BAMHeader
header = (BAM -> Maybe BED) -> ConduitT BAM (Maybe BED) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (BAMHeader -> BAM -> Maybe BED
bamToBed BAMHeader
header) ConduitT BAM (Maybe BED) m ()
-> ConduitM (Maybe BED) BED m () -> ConduitT BAM BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe BED) BED m ()
forall (m :: * -> *) mono.
(Monad m, MonoFoldable mono) =>
ConduitT mono (Element mono) m ()
concatC
{-# INLINE bamToBedC #-}

-- | Convert bam record to fastq record.
bamToFastqC :: Monad m => ConduitT BAM Fastq m ()
bamToFastqC :: ConduitT BAM Fastq m ()
bamToFastqC = (BAM -> Maybe Fastq) -> ConduitT BAM (Maybe Fastq) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC BAM -> Maybe Fastq
bamToFastq ConduitT BAM (Maybe Fastq) m ()
-> ConduitM (Maybe Fastq) Fastq m () -> ConduitT BAM Fastq m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe Fastq) Fastq m ()
forall (m :: * -> *) mono.
(Monad m, MonoFoldable mono) =>
ConduitT mono (Element mono) m ()
concatC
{-# INLINE bamToFastqC #-}

-- | Convert pairedend bam to fragment. 
bamToFragmentC :: Monad m => BAMHeader -> ConduitT BAM BED m ()
bamToFragmentC :: BAMHeader -> ConduitT BAM BED m ()
bamToFragmentC BAMHeader
header = (BAM -> Maybe BED) -> ConduitT BAM (Maybe BED) m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (BAMHeader -> BAM -> Maybe BED
bamToFragment BAMHeader
header) ConduitT BAM (Maybe BED) m ()
-> ConduitM (Maybe BED) BED m () -> ConduitT BAM BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe BED) BED m ()
forall (m :: * -> *) mono.
(Monad m, MonoFoldable mono) =>
ConduitT mono (Element mono) m ()
concatC
{-# INLINE bamToFragmentC #-}

bamToFragment :: BAMHeader -> BAM -> Maybe BED
bamToFragment :: BAMHeader -> BAM -> Maybe BED
bamToFragment BAMHeader
header BAM
bam 
    | Bool -> Bool
not (Word16 -> Bool
isFirstSegment Word16
flg) = Maybe BED
forall a. Maybe a
Nothing
    | Bool
otherwise = do
        ByteString
chr1 <- BAMHeader -> BAM -> Maybe ByteString
refName BAMHeader
header BAM
bam
        ByteString
chr2 <- BAMHeader -> BAM -> Maybe ByteString
mateRefName BAMHeader
header BAM
bam
        if ByteString
chr1 ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
chr2
            then BED -> Maybe BED
forall (m :: * -> *) a. Monad m => a -> m a
return (BED -> Maybe BED) -> BED -> Maybe BED
forall a b. (a -> b) -> a -> b
$ ByteString
-> Int -> Int -> Maybe ByteString -> Maybe Int -> Maybe Bool -> BED
BED ByteString
chr1 (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
start1 Int
start2) (Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
end1 Int
end2)
                (ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ BAM -> ByteString
queryName BAM
bam) Maybe Int
forall a. Maybe a
Nothing Maybe Bool
forall a. Maybe a
Nothing
            else Maybe BED
forall (m :: * -> *) a. MonadPlus m => m a
mzero
  where
    start1 :: Int
start1 = BAM -> Int
startLoc BAM
bam
    end1 :: Int
end1 = BAM -> Int
endLoc BAM
bam
    start2 :: Int
start2 = BAM -> Int
mateStartLoc BAM
bam
    end2 :: Int
end2 = BAM -> Int
mateStartLoc BAM
bam Int -> Int -> Int
forall a. Num a => a -> a -> a
+ CIGAR -> Int
ciglen CIGAR
cig
    cig :: CIGAR
cig = case (Char, Char) -> BAM -> Maybe AuxiliaryData
queryAuxData (Char
'M', Char
'C') BAM
bam of
        Just (AuxString ByteString
x) -> ByteString -> CIGAR
string2Cigar ByteString
x
        Maybe AuxiliaryData
_ -> [Char] -> CIGAR
forall a. HasCallStack => [Char] -> a
error [Char]
"No MC tag. Please run samtools fixmate on file first."
    ciglen :: CIGAR -> Int
ciglen (CIGAR [(Int, Char)]
c) = (Int -> (Int, Char) -> Int) -> Int -> [(Int, Char)] -> Int
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Int -> (Int, Char) -> Int
forall p. Num p => p -> (p, Char) -> p
f Int
0 [(Int, Char)]
c
        where f :: p -> (p, Char) -> p
f p
acc (p
n,Char
x) = if Char
x Char -> [Char] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
"MDN=X" then p
n p -> p -> p
forall a. Num a => a -> a -> a
+ p
acc else p
acc
    flg :: Word16
flg = BAM -> Word16
flag BAM
bam
{-# INLINE bamToFragment #-}

-- | Convert pairedend bam file to bed. the bam file must be sorted by names,
-- e.g., using "samtools sort -n". This condition is checked from Bam header.
sortedBamToBedPE :: Monad m => BAMHeader -> ConduitT BAM (BED, BED) m ()
sortedBamToBedPE :: BAMHeader -> ConduitT BAM (BED, BED) m ()
sortedBamToBedPE BAMHeader
header = case BAMHeader -> SortOrder
getSortOrder BAMHeader
header of
    SortOrder
Queryname -> ConduitT BAM (Maybe (BED, BED)) m ()
loopBedPE ConduitT BAM (Maybe (BED, BED)) m ()
-> ConduitM (Maybe (BED, BED)) (BED, BED) m ()
-> ConduitT BAM (BED, BED) m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ConduitM (Maybe (BED, BED)) (BED, BED) m ()
forall (m :: * -> *) mono.
(Monad m, MonoFoldable mono) =>
ConduitT mono (Element mono) m ()
concatC
    SortOrder
_         -> [Char] -> ConduitT BAM (BED, BED) m ()
forall a. HasCallStack => [Char] -> a
error [Char]
"Bam file must be sorted by NAME."
  where
    loopBedPE :: ConduitT BAM (Maybe (BED, BED)) m ()
loopBedPE = (,) (BAM -> BAM -> (BAM, BAM))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe BAM)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe (BAM -> (BAM, BAM)))
forall a b.
(a -> b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
<$$> ConduitT BAM (Maybe (BED, BED)) m (Maybe BAM)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT BAM (Maybe (BED, BED)) m (Maybe (BAM -> (BAM, BAM)))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe BAM)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe (BAM, BAM))
forall a b.
ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
<***> ConduitT BAM (Maybe (BED, BED)) m (Maybe BAM)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await ConduitT BAM (Maybe (BED, BED)) m (Maybe (BAM, BAM))
-> (Maybe (BAM, BAM) -> ConduitT BAM (Maybe (BED, BED)) m ())
-> ConduitT BAM (Maybe (BED, BED)) m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
        Maybe (BAM, BAM)
Nothing -> () -> ConduitT BAM (Maybe (BED, BED)) m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        Just (BAM
bam1, BAM
bam2) -> if BAM -> ByteString
queryName BAM
bam1 ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= BAM -> ByteString
queryName BAM
bam2
            then [Char] -> ConduitT BAM (Maybe (BED, BED)) m ()
forall a. HasCallStack => [Char] -> a
error [Char]
"Adjacent records have different query names. Aborted."
            else do
                Maybe (BED, BED) -> ConduitT BAM (Maybe (BED, BED)) m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Maybe (BED, BED) -> ConduitT BAM (Maybe (BED, BED)) m ())
-> Maybe (BED, BED) -> ConduitT BAM (Maybe (BED, BED)) m ()
forall a b. (a -> b) -> a -> b
$ (,) (BED -> BED -> (BED, BED))
-> Maybe BED -> Maybe (BED -> (BED, BED))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BAMHeader -> BAM -> Maybe BED
bamToBed BAMHeader
header BAM
bam1 Maybe (BED -> (BED, BED)) -> Maybe BED -> Maybe (BED, BED)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> BAMHeader -> BAM -> Maybe BED
bamToBed BAMHeader
header BAM
bam2
                ConduitT BAM (Maybe (BED, BED)) m ()
loopBedPE
      where
        <$$> :: (a -> b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
(<$$>) = (Maybe a -> Maybe b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((Maybe a -> Maybe b)
 -> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
 -> ConduitT BAM (Maybe (BED, BED)) m (Maybe b))
-> ((a -> b) -> Maybe a -> Maybe b)
-> (a -> b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap
        <***> :: ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
(<***>) = ConduitT BAM (Maybe (BED, BED)) m (Maybe a -> Maybe b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
(<*>) (ConduitT BAM (Maybe (BED, BED)) m (Maybe a -> Maybe b)
 -> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
 -> ConduitT BAM (Maybe (BED, BED)) m (Maybe b))
-> (ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
    -> ConduitT BAM (Maybe (BED, BED)) m (Maybe a -> Maybe b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Maybe (a -> b) -> Maybe a -> Maybe b)
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe (a -> b))
-> ConduitT BAM (Maybe (BED, BED)) m (Maybe a -> Maybe b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
(<*>)
{-# INLINE sortedBamToBedPE #-}

-- | Convert BAM to BED.
bamToBed :: BAMHeader -> BAM -> Maybe BED
bamToBed :: BAMHeader -> BAM -> Maybe BED
bamToBed BAMHeader
header BAM
bam = ByteString -> BED
mkBed (ByteString -> BED) -> Maybe ByteString -> Maybe BED
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BAMHeader -> BAM -> Maybe ByteString
refName BAMHeader
header BAM
bam 
  where
    mkBed :: ByteString -> BED
mkBed ByteString
chr = ByteString
-> Int -> Int -> Maybe ByteString -> Maybe Int -> Maybe Bool -> BED
BED ByteString
chr Int
start Int
end Maybe ByteString
nm Maybe Int
sc Maybe Bool
str
    start :: Int
start = BAM -> Int
startLoc BAM
bam
    end :: Int
end = BAM -> Int
endLoc BAM
bam
    nm :: Maybe ByteString
nm = ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ BAM -> ByteString
queryName BAM
bam
    str :: Maybe Bool
str = Bool -> Maybe Bool
forall a. a -> Maybe a
Just (Bool -> Maybe Bool) -> Bool -> Maybe Bool
forall a b. (a -> b) -> a -> b
$ Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ BAM -> Bool
isRev BAM
bam
    sc :: Maybe Int
sc = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ BAM -> Word8
mapq BAM
bam
{-# INLINE bamToBed #-}

-- | Convert BAM to Fastq.
bamToFastq :: BAM -> Maybe Fastq
bamToFastq :: BAM -> Maybe Fastq
bamToFastq BAM
bam = ByteString -> ByteString -> ByteString -> Fastq
Fastq (BAM -> ByteString
queryName BAM
bam) (ByteString -> ByteString -> Fastq)
-> Maybe ByteString -> Maybe (ByteString -> Fastq)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> BAM -> Maybe ByteString
getSeq BAM
bam Maybe (ByteString -> Fastq) -> Maybe ByteString -> Maybe Fastq
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> BAM -> Maybe ByteString
qualityS BAM
bam
{-# INLINE bamToFastq #-}