{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE TemplateHaskell #-}
module Bio.HTS where
import Conduit
import Control.Monad
import Control.Monad.Reader
import Data.Bits (testBit)
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString as BS
import Data.Int
import Data.Monoid ((<>))
import Data.Word
import Foreign.C.String
import Foreign.C.Types
import Control.Exception (bracket)
import Foreign.ForeignPtr
import Foreign.Marshal.Array
import Foreign.Ptr
import Language.C.Inline (baseCtx, bsCtx, context, include,
withPtr)
import qualified Language.C.Inline.Unsafe as CU
import System.IO
import System.IO.Unsafe (unsafePerformIO)
import Bio.HTS.Types
context (baseCtx <> bsCtx <> htsCtx)
include "htslib/sam.h"
type HeaderState = ResourceT (ReaderT FileHeader IO)
withBamFile :: FilePath -> (BamFileHandle -> HeaderState a) -> IO a
withBamFile fl action = bracket (openBamFile fl ReadMode) closeBamFile $ \h -> do
hdr <- readBamHeader h
runReaderT (runResourceT $ action h) hdr
readBam :: BamFileHandle -> ConduitT () Bam HeaderState ()
readBam h = do
r <- liftIO $ bamRead1 h
case r of
Nothing -> return ()
Just b -> yield b >> readBam h
{-# INLINE readBam #-}
writeBam :: FilePath -> ConduitT Bam o HeaderState ()
writeBam fn = bracketP (openBamFile fn WriteMode) closeBamFile $ \(BamFileHandle fp) -> do
maybeBam <- await
case maybeBam of
Nothing -> return ()
Just b' -> do
leftover b'
header <- lift ask
case header of
BamHeader hdr -> do
err <- liftIO $ [CU.exp| int {
bam_hdr_write($(htsFile* fp)->fp.bgzf, $(bam_hdr_t* hdr)) } |]
if err /= 0
then error "'bam_hdr_write' failed."
else sink fp
_ -> error "No Bam header was found!"
where
sink fp = awaitForever $ \b' -> liftIO $ withForeignPtr b' $ \b ->
[CU.exp| int { bam_write1($(htsFile* fp)->fp.bgzf, $(bam1_t* b)) } |]
{-# INLINE writeBam #-}
openBamFile :: FilePath -> IOMode -> IO BamFileHandle
openBamFile fn mode = do
fn' <- newCString fn
h <- case mode of
ReadMode -> [CU.exp| htsFile* { hts_open($(char* fn'), "r") } |]
WriteMode -> [CU.exp| htsFile* { hts_open($(char* fn'), "wb") } |]
_ -> error ""
return $ BamFileHandle h
readBamHeader :: BamFileHandle -> IO FileHeader
readBamHeader (BamFileHandle h) =
BamHeader <$> [CU.exp| bam_hdr_t* { bam_hdr_read($(htsFile* h)->fp.bgzf) } |]
closeBamFile :: BamFileHandle -> IO ()
closeBamFile (BamFileHandle h) = [CU.exp| void { hts_close($(htsFile* h)) } |]
showBamHeader :: FileHeader -> B.ByteString
showBamHeader (BamHeader hdr) = unsafePerformIO $ do
ptr <- [CU.exp| char* { $(bam_hdr_t* hdr)->text } |]
l <- [CU.exp| uint32_t { $(bam_hdr_t* hdr)->l_text } |]
B.packCStringLen (ptr, fromIntegral l)
showBamHeader _ = error "No Bam Header was found."
data SortOrder = Unknown
| Unsorted
| Queryname
| Coordinate
getSortOrder :: FileHeader -> SortOrder
getSortOrder header = case lookup "SO" fields of
Just "unknown" -> Unknown
Just "unsorted" -> Unsorted
Just "queryname" -> Queryname
Just "coordinate" -> Coordinate
_ -> Unknown
where
fields = map (f . B.split ':') $ tail $ B.split '\t' $ head $ B.lines $
showBamHeader header
f [a,b] = (a,b)
f _ = error "Auxiliary field parsing failed!"
bamRead1 :: BamFileHandle -> IO (Maybe Bam)
bamRead1 (BamFileHandle h) = do
(r, b) <- withPtr $ \r -> [CU.block| bam1_t* {
bam1_t *b = bam_init1();
*$(int* r) = bam_read1($(htsFile* h)->fp.bgzf, b);
return b;
} |]
if r < 0 then return Nothing else Just <$> newForeignPtr bamDestory b
{-# INLINE bamRead1 #-}
foreign import ccall unsafe "&bam_destroy1"
bamDestory :: FunPtr (Ptr Bam' -> IO ())
getChrId :: Bam -> Int32
getChrId = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.tid } |]
{-# INLINE getChrId #-}
getChr :: Ptr BamHdr -> Bam -> Maybe B.ByteString
getChr h b' | i < 0 = Nothing
| otherwise = Just $ unsafePerformIO $ join $ B.packCString <$>
[CU.exp| char* { $(bam_hdr_t* h)->target_name[$(int32_t i)] } |]
where
i = getChrId b'
{-# INLINE getChr #-}
position :: Bam -> Int32
position = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.pos } |]
{-# INLINE position #-}
endPos :: Bam -> Int32
endPos = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { bam_endpos($(bam1_t* b)) } |]
{-# INLINE endPos #-}
queryLen :: Bam -> Int32
queryLen = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.l_qseq } |]
{-# INLINE queryLen #-}
isRev :: Bam -> Bool
isRev = unsafePerformIO . flip withForeignPtr fn
where
fn b = do
r <- [CU.exp| int {bam_is_rev($(bam1_t* b)) } |]
return $ if r == 0 then False else True
{-# INLINE isRev #-}
flag :: Bam -> Word16
flag = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| uint16_t { $(bam1_t* b)->core.flag } |]
{-# INLINE flag #-}
mapq :: Bam -> Word8
mapq = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| uint8_t { $(bam1_t* b)->core.qual } |]
{-# INLINE mapq #-}
getSeq :: Bam -> Maybe B.ByteString
getSeq = unsafePerformIO . flip withForeignPtr fn
where
fn b = do
l <- [CU.exp| int32_t { $(bam1_t* b)->core.l_qseq } |]
if (l == 0)
then return Nothing
else allocaArray (fromIntegral l) $ \str -> do
[CU.block| void {
int32_t i;
uint8_t *s = bam_get_seq($(bam1_t* b));
for (i = 0; i < $(int32_t l); ++i)
$(char* str)[i] = "=ACMGRSVTWYHKDBN"[bam_seqi(s, i)];
} |]
Just <$> B.packCStringLen (str, fromIntegral l)
{-# INLINE getSeq #-}
qName :: Bam -> B.ByteString
qName = unsafePerformIO . flip withForeignPtr fn
where
fn b = join $ B.packCString <$>
[CU.exp| char* {bam_get_qname($(bam1_t* b)) } |]
{-# INLINE qName #-}
qualityS :: Bam -> Maybe B.ByteString
qualityS = fmap (BS.map (+33)) . quality
quality :: Bam -> Maybe B.ByteString
quality = unsafePerformIO . flip withForeignPtr fn
where
fn b = do
l <- [CU.exp| int32_t { $(bam1_t* b)->core.l_qseq } |]
if l == 0
then return Nothing
else do
x <- [CU.block| int8_t {
uint8_t *s = bam_get_qual($(bam1_t* b));
return (s[0] == 0xff);
} |]
if x /= 0
then return Nothing
else allocaArray (fromIntegral l) $ \str -> do
[CU.block| void {
int32_t i;
uint8_t *s = bam_get_qual($(bam1_t* b));
for (i = 0; i < $(int32_t l); ++i)
$(char* str)[i] = s[i];
} |]
Just <$> B.packCStringLen (str, fromIntegral l)
{-# INLINE quality #-}
cigar :: Bam -> Maybe [(Int, Char)]
cigar = unsafePerformIO . flip withForeignPtr fn
where
fn b = do
n <- [CU.exp| uint16_t { $(bam1_t* b)->core.n_cigar } |]
if (n == 0)
then return Nothing
else allocaArray (fromIntegral n) $ \num ->
allocaArray (fromIntegral n) $ \str -> do
[CU.block| void {
uint16_t i;
uint32_t *cigar = bam_get_cigar($(bam1_t* b));
for (i = 0; i < $(uint16_t n); ++i) {
$(int* num)[i] = bam_cigar_oplen(cigar[i]);
$(char* str)[i] = bam_cigar_opchr(cigar[i]);
}
} |]
num' <- peekArray (fromIntegral n) num
str' <- peekArray (fromIntegral n) str
return $ Just $
zip (map fromIntegral num') $ map castCCharToChar str'
{-# INLINE cigar #-}
mateChrId :: Bam -> Int32
mateChrId = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.mtid } |]
{-# INLINE mateChrId #-}
mateChr :: Ptr BamHdr -> Bam -> Maybe B.ByteString
mateChr h b' | i < 0 = Nothing
| otherwise = Just $ unsafePerformIO $ join $ B.packCString <$>
[CU.exp| char* { $(bam_hdr_t* h)->target_name[$(int32_t i)] } |]
where
i = mateChrId b'
{-# INLINE mateChr #-}
matePos :: Bam -> Int32
matePos = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.mpos } |]
{-# INLINE matePos #-}
tLen :: Bam -> Int32
tLen = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.isize } |]
{-# INLINE tLen #-}
bamToSam :: Ptr BamHdr -> Bam -> Sam
bamToSam h b = Sam (qName b) (flag b) (getChr h b) (position b) (mapq b)
(cigar b) (mateChr h b) (matePos b) (tLen b) (getSeq b) (quality b)
hasMultiSegments :: Word16 -> Bool
hasMultiSegments f = testBit f 0
isProperAligned :: Word16 -> Bool
isProperAligned f = testBit f 1
isUnmapped :: Word16 -> Bool
isUnmapped f = testBit f 2
isNextUnmapped :: Word16 -> Bool
isNextUnmapped f = testBit f 3
isRC :: Word16 -> Bool
isRC f = testBit f 4
isNextRC :: Word16 -> Bool
isNextRC f = testBit f 5
isFirstSegment :: Word16 -> Bool
isFirstSegment f = testBit f 6
isLastSegment :: Word16 -> Bool
isLastSegment f = testBit f 7
isSecondary :: Word16 -> Bool
isSecondary f = testBit f 8
isBadQual :: Word16 -> Bool
isBadQual f = testBit f 9
isDup :: Word16 -> Bool
isDup f = testBit f 10
isSupplementary :: Word16 -> Bool
isSupplementary f = testBit f 11