{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE LambdaCase #-}
module Bio.Data.Bam
    ( BAM
    , getBamHeader
    , streamBam
    , sinkBam
    , bamToBedC
    , bamToBed
    , bamToFragmentC
    , bamToFragment
    , bamToFastqC
    , bamToFastq
    , sortedBamToBedPE
    ) where

import Control.Monad (mzero)
import Data.List (foldl')
import           Bio.Data.Bed
import           Bio.Data.Bed.Types
import           Bio.Data.Fastq
import           Bio.HTS
import           Bio.HTS.Types        (BAM, BAMHeader, SortOrder(..))
import           Conduit

-- | Convert bam record to bed record. Unmapped reads will be discarded.
bamToBedC :: MonadIO m => BAMHeader -> ConduitT BAM BED m ()
bamToBedC header = mapC (bamToBed header) .| concatC
{-# INLINE bamToBedC #-}

-- | Convert bam record to fastq record.
bamToFastqC :: Monad m => ConduitT BAM Fastq m ()
bamToFastqC = mapC bamToFastq .| concatC
{-# INLINE bamToFastqC #-}

-- | Convert pairedend bam to fragment. 
bamToFragmentC :: Monad m => BAMHeader -> ConduitT BAM BED m ()
bamToFragmentC header = mapC (bamToFragment header) .| concatC
{-# INLINE bamToFragmentC #-}

bamToFragment :: BAMHeader -> BAM -> Maybe BED
bamToFragment header bam
    | not (isFirstSegment flg) = Nothing
    | otherwise = do
        chr1 <- refName header bam
        chr2 <- mateRefName header bam
        if chr1 == chr2
            then return $ BED chr1 (min start1 start2) (max end1 end2)
                (Just $ queryName bam) Nothing Nothing
            else mzero
  where
    start1 = startLoc bam
    end1 = endLoc bam
    start2 = mateStartLoc bam
    end2 = mateStartLoc bam + ciglen cig
    cig = case queryAuxData ('M', 'C') bam of
        Just (AuxString x) -> string2Cigar x
        _ -> error "No MC tag. Please run samtools fixmate on file first."
    ciglen (CIGAR c) = foldl' f 0 c
        where f acc (n,x) = if x `elem` "MDN=X" then n + acc else acc
    flg = flag bam
{-# INLINE bamToFragment #-}

-- | Convert pairedend bam file to bed. the bam file must be sorted by names,
-- e.g., using "samtools sort -n". This condition is checked from Bam header.
sortedBamToBedPE :: Monad m => BAMHeader -> ConduitT BAM (BED, BED) m ()
sortedBamToBedPE header = case getSortOrder header of
    Queryname -> loopBedPE .| concatC
    _         -> error "Bam file must be sorted by NAME."
  where
    loopBedPE = (,) <$$> await <***> await >>= \case
        Nothing -> return ()
        Just (bam1, bam2) -> if queryName bam1 /= queryName bam2
            then error "Adjacent records have different query names. Aborted."
            else do
                yield $ (,) <$> bamToBed header bam1 <*> bamToBed header bam2
                loopBedPE
      where
        (<$$>) = fmap . fmap
        (<***>) = (<*>) . fmap (<*>)
{-# INLINE sortedBamToBedPE #-}

-- | Convert BAM to BED.
bamToBed :: BAMHeader -> BAM -> Maybe BED
bamToBed header bam = mkBed <$> refName header bam
  where
    mkBed chr = BED chr start end nm sc str
    start = startLoc bam
    end = endLoc bam
    nm = Just $ queryName bam
    str = Just $ not $ isRev bam
    sc = Just $ fromIntegral $ mapq bam
{-# INLINE bamToBed #-}

-- | Convert BAM to Fastq.
bamToFastq :: BAM -> Maybe Fastq
bamToFastq bam = Fastq (queryName bam) <$> getSeq bam <*> qualityS bam
{-# INLINE bamToFastq #-}