{-# LANGUAGE FlexibleContexts #-} module Bio.Data.Bam ( Bam , HeaderState , withBamFile , readBam , writeBam , bamToBed , sortedBamToBedPE ) where import Bio.Data.Bed import Bio.HTS import Bio.HTS.Types (Bam, FileHeader (..)) import Conduit import Control.Lens ((&), (.~)) import Control.Monad.Reader (ask, lift) -- | Convert bam record to bed record. Unmapped reads will be discarded. bamToBed :: ConduitT Bam BED HeaderState () bamToBed = mapMC bamToBed1 .| concatC {-# INLINE bamToBed #-} -- | Convert pairedend bam file to bed. the bam file must be sorted by names, -- e.g., using "samtools sort -n". This condition is checked from Bam header. sortedBamToBedPE :: ConduitT Bam (BED, BED) HeaderState () sortedBamToBedPE = do maybeBam <- await case maybeBam of Nothing -> return () Just b' -> do leftover b' sortOrd <- getSortOrder <$> lift ask case sortOrd of Queryname -> loopBedPE .| concatC _ -> error "Bam file must be sorted by NAME." where loopBedPE = do pair <- (,) <$$> await <***> await case pair of Nothing -> return () Just (bam1, bam2) -> if qName bam1 /= qName bam2 then error "Adjacent records have different query names. Aborted." else do bed1 <- lift $ bamToBed1 bam1 bed2 <- lift $ bamToBed1 bam2 yield $ (,) <$> bed1 <*> bed2 loopBedPE where (<$$>) = fmap . fmap (<***>) = (<*>) . fmap (<*>) {-# INLINE sortedBamToBedPE #-} bamToBed1 :: Bam -> HeaderState (Maybe BED) bamToBed1 bam = do BamHeader hdr <- lift ask return $ (\chr -> asBed chr start end & name .~ nm & score .~ sc & strand .~ str) <$> getChr hdr bam where start = fromIntegral $ position bam end = fromIntegral $ endPos bam nm = Just $ qName bam str = Just $ not $ isRev bam sc = Just $ fromIntegral $ mapq bam {-# INLINE bamToBed1 #-}