module Bio.SamTools.Bam (
HeaderSeq(..)
, Header, nTargets, targetSeqList, targetSeq, targetSeqName, targetSeqLen, lookupTarget
, Bam1, header
, targetID, targetName, targetLen, position
, isPaired, isProperPair, isUnmap, isMateUnmap, isReverse, isMateReverse
, isRead1, isRead2, isSecondary, isQCFail, isDup, isSupplementary
, cigars, queryName, queryLength, querySeq, queryQual
, mateTargetID, mateTargetName, mateTargetLen, matePosition, insertSize
, nMismatch, nHits, matchDesc, auxGeti, auxGetf, auxGetd, auxGetA, auxGetZ, auxGet
, addAuxA, addAuxi, addAuxZ
, refSpLoc, refSeqLoc
, InHandle, inHeader
, openTamInFile, openTamInFileWithIndex, openBamInFile
, closeInHandle
, withTamInFile, withTamInFileWithIndex, withBamInFile
, get1
, readBams
, OutHandle, outHeader
, openTamOutFile, openBamOutFile
, closeOutHandle
, withTamOutFile, withBamOutFile
, put1
)
where
import Control.Concurrent.MVar
import Control.Exception
import Control.Monad
import Data.Bits
import qualified Data.ByteString as BSW
import qualified Data.ByteString.Char8 as BS
import Foreign hiding (new)
import Foreign.C.Types
import Foreign.C.String
import qualified System.IO.Unsafe as Unsafe
import qualified Data.Vector as V
import Bio.SeqLoc.OnSeq
import qualified Bio.SeqLoc.SpliceLocation as SpLoc
import Bio.SeqLoc.Strand
import Bio.SamTools.Cigar
import Bio.SamTools.Internal
import Bio.SamTools.LowLevel
targetID :: Bam1 -> Maybe Int
targetID b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ liftM fromTID . getTID
where fromTID ctid | ctid < 0 = Nothing
| otherwise = Just $! fromIntegral ctid
targetName :: Bam1 -> Maybe BS.ByteString
targetName b = liftM (targetSeqName (header b)) $! targetID b
targetLen :: Bam1 -> Maybe Int64
targetLen b = liftM (targetSeqLen (header b)) $! targetID b
position :: Bam1 -> Maybe Int64
position b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ liftM fromPos . getPos
where fromPos cpos | cpos < 0 = Nothing
| otherwise = Just $! fromIntegral cpos
isFlagSet :: BamFlag -> Bam1 -> Bool
isFlagSet f b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ liftM isfset . getFlag
where isfset = (== f) . (.&. f)
isPaired :: Bam1 -> Bool
isPaired = isFlagSet flagPaired
isProperPair :: Bam1 -> Bool
isProperPair = isFlagSet flagProperPair
isUnmap :: Bam1 -> Bool
isUnmap = isFlagSet flagUnmap
isMateUnmap :: Bam1 -> Bool
isMateUnmap = isFlagSet flagMUnmap
isReverse :: Bam1 -> Bool
isReverse = isFlagSet flagReverse
isMateReverse :: Bam1 -> Bool
isMateReverse = isFlagSet flagMReverse
isRead1 :: Bam1 -> Bool
isRead1 = isFlagSet flagRead1
isRead2 :: Bam1 -> Bool
isRead2 = isFlagSet flagRead2
isSecondary :: Bam1 -> Bool
isSecondary = isFlagSet flagSecondary
isQCFail :: Bam1 -> Bool
isQCFail = isFlagSet flagQCFail
isDup :: Bam1 -> Bool
isDup = isFlagSet flagDup
isSupplementary :: Bam1 -> Bool
isSupplementary = isFlagSet flagSupplementary
cigars :: Bam1 -> [Cigar]
cigars b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ \p -> do
nc <- getNCigar p
liftM (map toCigar) $! peekArray nc . bam1Cigar $ p
queryName :: Bam1 -> BS.ByteString
queryName b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) (return . bam1QName)
queryLength :: Bam1 -> Maybe Int64
queryLength b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ liftM fromc . getLQSeq
where fromc clq | clq < 1 = Nothing
| otherwise = Just $! fromIntegral clq
querySeq :: Bam1 -> Maybe BS.ByteString
querySeq b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ \p ->
let seqarr = bam1Seq p
getQSeq l | l < 1 = return Nothing
| otherwise = return $! Just $!
fst (BS.unfoldrN (fromIntegral l) (\i -> if i==l then Nothing else Just (seqiToChar $ bam1Seqi seqarr $ i,i+1)) 0)
in getLQSeq p >>= getQSeq
queryQual :: Bam1 -> Maybe BS.ByteString
queryQual b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ \p ->
let getQQual l | l < 1 = return Nothing
| otherwise = do q0 <- peek $! bam1Qual p
if q0 == 0xff
then return Nothing
else liftM (Just . BSW.map (+ 33)) $!
BS.packCStringLen (castPtr $! bam1Qual p, fromIntegral l)
in getLQSeq p >>= getQQual
seqiToChar :: CUChar -> Char
seqiToChar = (chars V.!) . fromIntegral
where chars = emptyChars V.// [(1, 'A'), (2, 'C'), (4, 'G'), (8, 'T'), (15, 'N')]
emptyChars = V.generate 16 (\idx -> error $ "Unknown char " ++ show idx)
mateTargetID :: Bam1 -> Maybe Int
mateTargetID b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ liftM fromTID . getMTID
where fromTID ctid | ctid < 0 = Nothing
| otherwise = Just $! fromIntegral ctid
mateTargetName :: Bam1 -> Maybe BS.ByteString
mateTargetName b = liftM (targetSeqName (header b)) $! mateTargetID b
mateTargetLen :: Bam1 -> Maybe Int64
mateTargetLen b = liftM (targetSeqLen (header b)) $! mateTargetID b
matePosition :: Bam1 -> Maybe Int64
matePosition b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ liftM fromPos . getMPos
where fromPos cpos | cpos < 0 = Nothing
| otherwise = Just $! fromIntegral cpos
insertSize :: Bam1 -> Maybe Int64
insertSize b = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ liftM fromISize . getISize
where fromISize cis | cis < 1 = Nothing
| otherwise = Just $! fromIntegral cis
auxGeti :: Bam1 -> String -> Maybe Int
auxGeti b str = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ \p ->
withCAString str $ \valstr ->
do val <- bamAuxGet p valstr
if val == nullPtr
then return Nothing
else liftM Just $! liftM fromIntegral $! bamAux2i val
auxGetf :: Bam1 -> String -> Maybe Float
auxGetf b str = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ \p ->
withCAString str $ \valstr ->
do val <- bamAuxGet p valstr
if val == nullPtr
then return Nothing
else liftM Just $! liftM realToFrac $! bamAux2f val
auxGetd :: Bam1 -> String -> Maybe Double
auxGetd b str = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ \p ->
withCAString str $ \valstr ->
do val <- bamAuxGet p valstr
if val == nullPtr
then return Nothing
else liftM Just $! liftM realToFrac $! bamAux2d val
auxGetA :: Bam1 -> String -> Maybe Char
auxGetA b str = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ \p ->
withCAString str $ \valstr ->
do val <- bamAuxGet p valstr
if val == nullPtr
then return Nothing
else liftM Just $! liftM castCCharToChar $! bamAux2A val
auxGetZ :: Bam1 -> String -> Maybe BS.ByteString
auxGetZ b str = Unsafe.unsafePerformIO $ withForeignPtr (ptrBam1 b) $ \p ->
withCAString str $ \valstr ->
do val <- bamAuxGet p valstr
if val == nullPtr
then return Nothing
else do cstr <- bamAux2Z val
if cstr == nullPtr
then return Nothing
else liftM Just . BS.packCString $ cstr
class AuxGet a where
auxGet :: Bam1 -> String -> Maybe a
instance AuxGet Int where
auxGet b str = auxGeti b str
instance AuxGet Float where
auxGet b str = auxGetf b str
instance AuxGet Double where
auxGet b str = auxGetd b str
instance AuxGet Char where
auxGet b str = auxGetA b str
instance AuxGet BS.ByteString where
auxGet b str = auxGetZ b str
addAuxA :: Bam1 -> String -> Char -> IO Bam1
addAuxA b0 tag ch
= alloca $ \(valptr :: (Ptr CChar)) -> do
poke valptr (CChar . fromIntegral . fromEnum $ ch)
addAux b0 tag typecchar vallen (castPtr valptr)
where typecchar = (CChar . fromIntegral . fromEnum $ 'A')
vallen = fromIntegral $ sizeOf (undefined :: CChar)
addAuxi :: Bam1 -> String -> Int -> IO Bam1
addAuxi b0 tag i
= alloca $ \(valptr :: (Ptr CInt)) -> do
poke valptr (CInt . fromIntegral $ i)
addAux b0 tag typecchar vallen (castPtr valptr)
where typecchar = (CChar . fromIntegral . fromEnum $ 'i')
vallen = fromIntegral $ sizeOf (undefined :: CInt)
addAuxZ :: Bam1 -> String -> String -> IO Bam1
addAuxZ b0 tag str
= withCAString str $ \cstr -> do
strlen <- lengthArray0 (0 :: CChar) cstr
addAux b0 tag typecchar (CInt . fromIntegral $ strlen + 1) (castPtr cstr)
where typecchar = (CChar . fromIntegral . fromEnum $ 'Z')
addAux :: Bam1 -> String -> CChar -> CInt -> Ptr CUChar -> IO Bam1
addAux b0 tag typecchar vallen valptr
| Prelude.length tag == 2 = withForeignPtr (ptrBam1 b0) $ \pbi0 ->
do pb' <- bamDup1 pbi0 >>= newForeignPtr bamDestroy1Ptr
withForeignPtr pb' $ \pbi' ->
withCAStringLen tag $ \(tagstr, _tagstrlen) ->
bamAuxAppend pbi' tagstr typecchar vallen valptr
return $ Bam1 { ptrBam1 = pb', header = header b0 }
| otherwise = ioError . userError $ "Bad BAM Aux tag " ++ show tag
matchDesc :: Bam1 -> Maybe BS.ByteString
matchDesc b = auxGetZ b "MD"
nHits :: Bam1 -> Maybe Int
nHits b = auxGeti b "NH"
nMismatch :: Bam1 -> Maybe Int
nMismatch b = auxGeti b "NM"
refSpLoc :: Bam1 -> Maybe SpLoc.SpliceLoc
refSpLoc b | isUnmap b = Nothing
| otherwise = liftM (stranded strand) $! liftM2 (cigarToSpLoc) (position b) (Just . cigars $ b)
where strand = if isReverse b then Minus else Plus
refSeqLoc :: Bam1 -> Maybe SpliceSeqLoc
refSeqLoc b = liftM2 OnSeq (liftM toSeqLabel $! targetName b) (refSpLoc b)
data InHandle = InHandle { inFilename :: !FilePath
, samfile :: !(MVar (Ptr SamFileInt))
, inHeader :: !Header
}
newInHandle :: FilePath -> Ptr SamFileInt -> IO InHandle
newInHandle filename fsam = do
when (fsam == nullPtr) $ ioError . userError $ "Error opening BAM file " ++ show filename
mv <- newMVar fsam
addMVarFinalizer mv (finalizeSamFile mv)
bhdr <- getSbamHeader fsam
when (bhdr == nullPtr) $ ioError . userError $
"Error reading header from BAM file " ++ show filename
hdr <- newHeader bhdr
return $ InHandle { inFilename = filename, samfile = mv, inHeader = hdr }
openTamInFile :: FilePath -> IO InHandle
openTamInFile filename = sbamOpen filename "r" nullPtr >>= newInHandle filename
openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandle
openTamInFileWithIndex filename indexname
= withCString indexname (sbamOpen filename "r" . castPtr) >>= newInHandle filename
openBamInFile :: FilePath -> IO InHandle
openBamInFile filename = sbamOpen filename "rb" nullPtr >>= newInHandle filename
finalizeSamFile :: MVar (Ptr SamFileInt) -> IO ()
finalizeSamFile mv = modifyMVar mv $ \fsam -> do
unless (fsam == nullPtr) $ sbamClose fsam
return (nullPtr, ())
closeInHandle :: InHandle -> IO ()
closeInHandle = finalizeSamFile . samfile
withTamInFile :: FilePath -> (InHandle -> IO a) -> IO a
withTamInFile filename = bracket (openTamInFile filename) closeInHandle
withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO a
withTamInFileWithIndex filename indexname = bracket (openTamInFileWithIndex filename indexname) closeInHandle
withBamInFile :: FilePath -> (InHandle -> IO a) -> IO a
withBamInFile filename = bracket (openBamInFile filename) closeInHandle
get1 :: InHandle -> IO (Maybe Bam1)
get1 inh = withMVar (samfile inh) $ \fsam -> do
b <- bamInit1
res <- sbamRead fsam b
if res < 0
then do bamDestroy1 b
if res < 1
then ioError . userError $ "Error reading from BAM file " ++ show (inFilename inh)
else return Nothing
else do bptr <- newForeignPtr bamDestroy1Ptr b
return . Just $ Bam1 { ptrBam1 = bptr, header = inHeader inh }
readBams :: FilePath -> IO [Bam1]
readBams = openBamInFile >=> getBams
where
getBams h = do
b <- get1 h
case b of Nothing -> closeInHandle h >> return []
Just b1 -> do
bs <- Unsafe.unsafeInterleaveIO (getBams h)
return (b1:bs)
data OutHandle = OutHandle { outFilename :: !FilePath
, outfile :: !(MVar (Ptr SamFileInt))
, outHeader :: !Header
}
newOutHandle :: String -> FilePath -> Header -> IO OutHandle
newOutHandle mode filename hdr = do
fsam <- withForeignPtr (unHeader hdr) $ sbamOpen filename mode . castPtr
when (fsam == nullPtr) $ ioError . userError $ "Error opening BAM file " ++ show filename
mv <- newMVar fsam
addMVarFinalizer mv (finalizeSamFile mv)
return $ OutHandle { outFilename = filename, outfile = mv, outHeader = hdr }
openTamOutFile :: FilePath -> Header -> IO OutHandle
openTamOutFile = newOutHandle "wh"
openBamOutFile :: FilePath -> Header -> IO OutHandle
openBamOutFile = newOutHandle "wb"
closeOutHandle :: OutHandle -> IO ()
closeOutHandle = finalizeSamFile . outfile
withTamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a
withTamOutFile filename hdr = bracket (openTamOutFile filename hdr) closeOutHandle
withBamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a
withBamOutFile filename hdr = bracket (openBamOutFile filename hdr) closeOutHandle
put1 :: OutHandle -> Bam1 -> IO ()
put1 outh b = withMVar (outfile outh) $ \fsam ->
withForeignPtr (ptrBam1 b) $ \p ->
sbamWrite fsam p >>= handleRes
where handleRes res | res > 0 = return ()
| otherwise = ioError . userError $ "Error writing to BAM file " ++ show (outFilename outh)