{-# LINE 1 "src/Bio/HTS.chs" #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
module Bio.HTS
( getBamHeader
, getSortOrder
, streamBam
, sinkBam
, getChrId
, getChr
, startLoc
, endLoc
, readLen
, isRev
, flag
, mapq
, getSeq
, seqName
, qualityS
, quality
, cigar
, mateChr
, mateChrId
, mateStartLoc
, tLen
, auxData
, queryAuxData
, bamToSam
, appendAux
, hasMultiSegments
, isProperAligned
, isUnmapped
, isNextUnmapped
, isRC
, isNextRC
, isFirstSegment
, isLastSegment
, isSecondary
, isBadQual
, isDup
, isSupplementary
) where
import qualified Foreign.C.Types as C2HSImp
import qualified Foreign.Ptr as C2HSImp
import qualified Foreign.Storable as C2HSImp
import Conduit
import Data.Bits (testBit)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString as BS
import Data.Int
import Data.Word
import Foreign.C.String
import Foreign.ForeignPtr
import Foreign.Marshal.Array
import Foreign.Marshal.Alloc
import Foreign.Storable (poke, peekByteOff)
import Foreign.Ptr
import System.IO
import System.IO.Unsafe (unsafePerformIO)
import Bio.HTS.Internal
import Bio.HTS.Types
getBamHeader :: FilePath -> IO BAMHeader
getBamHeader input = withHTSFile input ReadMode $ \hts ->
getBgzf hts >>= bamHdrRead >>= newForeignPtr bamHdrDestroy >>=
return . BAMHeader
{-# INLINE getBamHeader #-}
foreign import ccall unsafe "&bam_hdr_destroy"
bamHdrDestroy :: FunPtr (Ptr BamHdr -> IO ())
getSortOrder :: BAMHeader -> SortOrder
getSortOrder header = case lookup "SO" fields of
Just "unknown" -> Unknown
Just "unsorted" -> Unsorted
Just "queryname" -> Queryname
Just "coordinate" -> Coordinate
_ -> Unknown
where
fields = map (f . B.split ':') $ tail $ B.split '\t' $ head $ B.lines $
showBamHeader header
f [a,b] = (a,b)
f _ = error "Auxiliary field parsing failed!"
streamBam :: MonadResource m => FilePath -> ConduitT i BAM m ()
streamBam input = bracketP (htsOpen input "r") htsClose $ \hts -> do
bgzf <- liftIO $ getBgzf hts
_ <- liftIO $ bamHdrRead bgzf >>= newForeignPtr bamHdrDestroy
loop bgzf
where
loop bgzf = do
ptr <- liftIO $ callocBytes 64
{-# LINE 101 "src/Bio/HTS.chs" #-}
code <- liftIO $ bamRead1 bgzf ptr
case () of
_ | code > 0 -> liftIO (newForeignPtr bamDestory1 ptr) >>=
yield . BAM >> loop bgzf
| code == -1 -> return ()
| code == -2 -> error "truncated file"
| otherwise -> error $ "read bam failed with code: " ++ show code
{-# INLINE streamBam #-}
foreign import ccall unsafe "&bam_destroy1"
bamDestory1 :: FunPtr (Ptr Bam1 -> IO ())
sinkBam :: MonadResource m => FilePath -> BAMHeader -> ConduitT BAM o m ()
sinkBam output header = bracketP (htsOpen output "wb") htsClose $ \hts -> do
bgzf <- liftIO $ getBgzf hts
liftIO (withForeignPtr (unbamHeader header) $ bamHdrWrite bgzf) >>= \case
0 -> mapM_C $ \bam -> liftIO $ do
code <- withForeignPtr (unbam bam) (bamWrite1 bgzf)
if code < 0
then error "bam_write1 failed"
else return ()
_ -> error "'bam_hdr_write' failed."
getChrId :: BAM -> Int
getChrId = unsafePerformIO . flip withForeignPtr fun . unbam
where
fun = fmap fromIntegral . (\ptr -> do {C2HSImp.peekByteOff ptr 0 :: IO C2HSImp.CInt})
{-# LINE 131 "src/Bio/HTS.chs" #-}
{-# INLINE getChrId #-}
getChr :: BAMHeader -> BAM -> Maybe B.ByteString
getChr header bam
| chr < 0 = Nothing
| otherwise = Just $ unsafePerformIO $
withForeignPtr (unbamHeader header) $ \h ->
bamChr h chr >>= B.packCString
where
chr = getChrId bam
{-# INLINE getChr #-}
startLoc :: BAM -> Int
startLoc = unsafePerformIO . flip withForeignPtr fun . unbam
where
fun = fmap fromIntegral . (\ptr -> do {C2HSImp.peekByteOff ptr 4 :: IO C2HSImp.CInt})
{-# LINE 149 "src/Bio/HTS.chs" #-}
{-# INLINE startLoc #-}
endLoc :: BAM -> Int
endLoc = unsafePerformIO . flip withForeignPtr bamEndpos .unbam
{-# INLINE endLoc #-}
readLen :: BAM -> Int
readLen = unsafePerformIO . flip withForeignPtr fun . unbam
where
fun = fmap fromIntegral . (\ptr -> do {C2HSImp.peekByteOff ptr 20 :: IO C2HSImp.CInt})
{-# LINE 163 "src/Bio/HTS.chs" #-}
{-# INLINE readLen #-}
isRev :: BAM -> Bool
isRev = unsafePerformIO . flip withForeignPtr bamIsRev . unbam
{-# INLINE isRev #-}
flag :: BAM -> Word16
flag = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn = fmap fromIntegral . (\ptr -> do {C2HSImp.peekByteOff ptr 12 :: IO C2HSImp.CUShort})
{-# LINE 175 "src/Bio/HTS.chs" #-}
{-# INLINE flag #-}
mapq :: BAM -> Word8
mapq = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn = fmap fromIntegral . (\ptr -> do {C2HSImp.peekByteOff ptr 10 :: IO C2HSImp.CUChar})
{-# LINE 184 "src/Bio/HTS.chs" #-}
{-# INLINE mapq #-}
getSeq :: BAM -> Maybe B.ByteString
getSeq = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn b = fromIntegral <$> (\ptr -> do {C2HSImp.peekByteOff ptr 20 :: IO C2HSImp.CInt}) b >>= \case
0 -> return Nothing
n -> allocaArray n $ \str -> do
bamGetSeq b str n
Just <$> B.packCStringLen (str, n)
{-# INLINE getSeq #-}
seqName :: BAM -> B.ByteString
seqName = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn b = (\ptr -> do {C2HSImp.peekByteOff ptr 48 :: IO (C2HSImp.Ptr C2HSImp.CUChar)}) b >>= B.packCString . castPtr
{-# INLINE seqName #-}
qualityS :: BAM -> Maybe B.ByteString
qualityS = fmap (BS.map (+33)) . quality
quality :: BAM -> Maybe B.ByteString
quality = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn b = fromIntegral <$> (\ptr -> do {C2HSImp.peekByteOff ptr 20 :: IO C2HSImp.CInt}) b >>= \case
0 -> return Nothing
n -> allocaArray n $ \str -> bamGetQual b str n >>= \case
0 -> Just <$> B.packCStringLen (str, n)
_ -> return Nothing
{-# INLINE quality #-}
cigar :: BAM -> Maybe [(Int, Char)]
cigar = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn b = fromIntegral <$> (\ptr -> do {C2HSImp.peekByteOff ptr 16 :: IO C2HSImp.CUInt}) b >>= \case
0 -> return Nothing
n -> allocaArray n $ \num -> allocaArray n $ \str -> do
bamGetCigar b num str n
num' <- peekArray (fromIntegral n) num
str' <- peekArray (fromIntegral n) str
return $ Just $ zip (map fromIntegral num') $
map castCCharToChar str'
{-# INLINE cigar #-}
mateChrId :: BAM -> Int
mateChrId = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn = fmap fromIntegral . (\ptr -> do {C2HSImp.peekByteOff ptr 24 :: IO C2HSImp.CInt})
{-# LINE 236 "src/Bio/HTS.chs" #-}
{-# INLINE mateChrId #-}
mateChr :: BAMHeader -> BAM -> Maybe B.ByteString
mateChr header bam
| chr < 0 = Nothing
| otherwise = Just $ unsafePerformIO $
withForeignPtr (unbamHeader header) $ \h ->
bamChr h chr >>= B.packCString
where
chr = mateChrId bam
{-# INLINE mateChr #-}
mateStartLoc :: BAM -> Int
mateStartLoc = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn = fmap fromIntegral . (\ptr -> do {C2HSImp.peekByteOff ptr 28 :: IO C2HSImp.CInt})
{-# LINE 253 "src/Bio/HTS.chs" #-}
{-# INLINE mateStartLoc #-}
tLen :: BAM -> Int
tLen = unsafePerformIO . flip withForeignPtr fn . unbam
where
fn = fmap fromIntegral . (\ptr -> do {C2HSImp.peekByteOff ptr 32 :: IO C2HSImp.CInt})
{-# LINE 259 "src/Bio/HTS.chs" #-}
{-# INLINE tLen #-}
auxData :: BAM -> [((Char, Char), AuxiliaryData)]
auxData bam = unsafePerformIO $ withForeignPtr (unbam bam) $ \b -> do
l <- bamGetLAux b
aux <- bamGetAux b
go aux l
where
go ptr i
| i <= 0 = return []
| otherwise = do
name <- (,) <$> peekByteOff ptr 0 <*> peekByteOff ptr 1
castCCharToChar <$> peekByteOff ptr 2 >>= \case
'A' -> do
r <- AuxChar . castCCharToChar <$> peekByteOff ptr 3
rs <- go (plusPtr ptr 4) $ i - 4
return $ (name, r) : rs
'c' -> do
r <- AuxInt . fromIntegral <$> (peekByteOff ptr 3 :: IO Int8)
rs <- go (plusPtr ptr 4) $ i - 4
return $ (name, r) : rs
'C' -> do
r <- AuxInt . fromIntegral <$> (peekByteOff ptr 3 :: IO Word8)
rs <- go (plusPtr ptr 4) $ i - 4
return $ (name, r) : rs
's' -> do
r <- AuxInt . fromIntegral <$> (peekByteOff ptr 3 :: IO Int16)
rs <- go (plusPtr ptr 5) $ i - 5
return $ (name, r) : rs
'S' -> do
r <- AuxInt . fromIntegral <$> (peekByteOff ptr 3 :: IO Word16)
rs <- go (plusPtr ptr 5) $ i - 5
return $ (name, r) : rs
'i' -> do
r <- AuxInt . fromIntegral <$> (peekByteOff ptr 3 :: IO Int32)
rs <- go (plusPtr ptr 7) $ i - 7
return $ (name, r) : rs
'I' -> do
r <- AuxInt . fromIntegral <$> (peekByteOff ptr 3 :: IO Word32)
rs <- go (plusPtr ptr 7) $ i - 7
return $ (name, r) : rs
'f' -> do
r <- AuxFloat <$> peekByteOff ptr 3
rs <- go (plusPtr ptr 7) $ i - 7
return $ (name, r) : rs
'Z' -> do
str <- B.packCString (plusPtr ptr 3)
let l = B.length str + 1 + 3
rs <- go (plusPtr ptr l) $ i - l
return $ (name, AuxString str) : rs
'H' -> do
str <- B.packCString (plusPtr ptr 3)
let l = B.length str + 1 + 3
rs <- go (plusPtr ptr l) $ i - l
return $ (name, AuxByteArray str) : rs
'B' -> do
n <- fromIntegral <$> (peekByteOff ptr 4 :: IO Int32)
castCCharToChar <$> peekByteOff ptr 3 >>= \case
'c' -> do
r <- AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 8 :: IO [Int8])
let l = 8 + n
rs <- go (plusPtr ptr l) $ i - l
return $ (name, r) : rs
'C' -> do
r <- AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 8 :: IO [Word8])
let l = 8 + n
rs <- go (plusPtr ptr l) $ i - l
return $ (name, r) : rs
's' -> do
r <- AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 8 :: IO [Int16])
let l = 8 + 2 * n
rs <- go (plusPtr ptr l) $ i - l
return $ (name, r) : rs
'S' -> do
r <- AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 8 :: IO [Word16])
let l = 8 + 2 * n
rs <- go (plusPtr ptr l) $ i - l
return $ (name, r) : rs
'i' -> do
r <- AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 8 :: IO [Int32])
let l = 8 + 4 * n
rs <- go (plusPtr ptr l) $ i - l
return $ (name, r) : rs
'I' -> do
r <- AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 8 :: IO [Word32])
let l = 8 + 4 * n
rs <- go (plusPtr ptr l) $ i - l
return $ (name, r) : rs
'f' -> do
r <- AuxFloatArray <$>
(peekArray n $ plusPtr ptr 8 :: IO [Float])
let l = 8 + 4 * n
rs <- go (plusPtr ptr l) $ i - l
return $ (name, r) : rs
x -> error $ "Unknown auxiliary record type: " ++ [x]
x -> error $ "Unknown auxiliary record type: " ++ [x]
{-# INLINE auxData #-}
queryAuxData :: (Char, Char) -> BAM -> Maybe AuxiliaryData
queryAuxData (x1,x2) bam = unsafePerformIO $
withForeignPtr (unbam bam) $ \b -> do
ptr <- bamAuxGet b [x1,x2]
if ptr == nullPtr
then return Nothing
else Just <$> getAuxData1 ptr
{-# INLINE queryAuxData #-}
getAuxData1 :: Ptr () -> IO AuxiliaryData
getAuxData1 ptr = castCCharToChar <$> peekByteOff ptr 0 >>= \case
'A' -> AuxChar . castCCharToChar <$> peekByteOff ptr 1
'c' -> AuxInt . fromIntegral <$> (peekByteOff ptr 1 :: IO Int8)
'C' -> AuxInt . fromIntegral <$> (peekByteOff ptr 1 :: IO Word8)
's' -> AuxInt . fromIntegral <$> (peekByteOff ptr 1 :: IO Int16)
'S' -> AuxInt . fromIntegral <$> (peekByteOff ptr 1 :: IO Word16)
'i' -> AuxInt . fromIntegral <$> (peekByteOff ptr 1 :: IO Int32)
'I' -> AuxInt . fromIntegral <$> (peekByteOff ptr 1 :: IO Word32)
'f' -> AuxFloat <$> peekByteOff ptr 1
'Z' -> AuxString <$> B.packCString (plusPtr ptr 1)
'H' -> AuxByteArray <$> B.packCString (plusPtr ptr 1)
'B' -> do
n <- fromIntegral <$> (peekByteOff ptr 2 :: IO Int32)
peekByteOff ptr 1 >>= \case
'c' -> AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 6 :: IO [Int8])
'C' -> AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 6 :: IO [Word8])
's' -> AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 6 :: IO [Int16])
'S' -> AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 6 :: IO [Word16])
'i' -> AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 6 :: IO [Int32])
'I' -> AuxIntArray . map fromIntegral <$>
(peekArray n $ plusPtr ptr 6 :: IO [Word32])
'f' -> AuxFloatArray <$>
(peekArray n $ plusPtr ptr 6 :: IO [Float])
x -> error $ "Unknown auxiliary record type: " ++ [x]
x -> error $ "Unknown auxiliary record type: " ++ [x]
{-# INLINE getAuxData1 #-}
bamToSam :: BAMHeader -> BAM -> SAM
bamToSam h b = SAM (seqName b) (flag b) (getChr h b) (startLoc b) (mapq b)
(cigar b) (mateChr h b) (mateStartLoc b) (tLen b) (getSeq b) (quality b)
(auxData b)
hasMultiSegments :: Word16 -> Bool
hasMultiSegments f = testBit f 0
isProperAligned :: Word16 -> Bool
isProperAligned f = testBit f 1
isUnmapped :: Word16 -> Bool
isUnmapped f = testBit f 2
isNextUnmapped :: Word16 -> Bool
isNextUnmapped f = testBit f 3
isRC :: Word16 -> Bool
isRC f = testBit f 4
isNextRC :: Word16 -> Bool
isNextRC f = testBit f 5
isFirstSegment :: Word16 -> Bool
isFirstSegment f = testBit f 6
isLastSegment :: Word16 -> Bool
isLastSegment f = testBit f 7
isSecondary :: Word16 -> Bool
isSecondary f = testBit f 8
isBadQual :: Word16 -> Bool
isBadQual f = testBit f 9
isDup :: Word16 -> Bool
isDup f = testBit f 10
isSupplementary :: Word16 -> Bool
isSupplementary f = testBit f 11
appendAux :: (Char, Char)
-> AuxiliaryData
-> BAM
-> IO ()
appendAux (x1,x2) aux bam = withForeignPtr (unbam bam) $ \b -> do
code <- case aux of
AuxChar d -> with d $ bamAuxAppend b [x1,x2] 'A' 1
AuxInt d -> with d $ bamAuxAppend b [x1,x2] 'i' 4
AuxFloat d -> with d $ bamAuxAppend b [x1,x2] 'f' 4
AuxString d -> B.useAsCString d $
bamAuxAppend b [x1,x2] 'Z' (B.length d + 1) . castPtr
_ -> error "Not implemented"
if code == 0 then return () else error "Append aux failed"
where
with x fun = alloca $ \ptr -> poke ptr x >> fun (castPtr ptr)
{-# INLINE appendAux #-}