module Bio.HTS where
import Conduit
import Control.Monad
import Control.Monad.Reader
import Data.Bits (testBit)
import qualified Data.ByteString.Char8 as B
import Data.Int
import Data.Monoid ((<>))
import Data.Word
import Foreign.C.String
import Foreign.C.Types
import Control.Exception (bracket)
import Foreign.ForeignPtr
import Foreign.Marshal.Array
import Foreign.Ptr
import Language.C.Inline (baseCtx, bsCtx, context, include,
withPtr)
import qualified Language.C.Inline.Unsafe as CU
import System.IO
import System.IO.Unsafe (unsafePerformIO)
import Bio.HTS.Types
context (baseCtx <> bsCtx <> htsCtx)
include "htslib/sam.h"
type HeaderState = ResourceT (ReaderT FileHeader IO)
withBamFile :: FilePath -> (BamFileHandle -> HeaderState a) -> IO a
withBamFile fl action = bracket (openBamFile fl ReadMode) closeBamFile $ \h -> do
hdr <- readBamHeader h
runReaderT (runResourceT $ action h) hdr
readBam :: BamFileHandle -> ConduitT () Bam HeaderState ()
readBam h = do
r <- liftIO $ bamRead1 h
case r of
Nothing -> return ()
Just b -> yield b >> readBam h
writeBam :: FilePath -> ConduitT Bam o HeaderState ()
writeBam fn = bracketP (openBamFile fn WriteMode) closeBamFile $ \(BamFileHandle fp) -> do
maybeBam <- await
case maybeBam of
Nothing -> return ()
Just b' -> do
leftover b'
header <- lift ask
case header of
BamHeader hdr -> do
err <- liftIO $ [CU.exp| int {
bam_hdr_write($(htsFile* fp)->fp.bgzf, $(bam_hdr_t* hdr)) } |]
if err /= 0
then error "'bam_hdr_write' failed."
else sink fp
_ -> error "No Bam header was found!"
where
sink fp = awaitForever $ \b' -> liftIO $ withForeignPtr b' $ \b ->
[CU.exp| int { bam_write1($(htsFile* fp)->fp.bgzf, $(bam1_t* b)) } |]
openBamFile :: FilePath -> IOMode -> IO BamFileHandle
openBamFile fn mode = do
fn' <- newCString fn
h <- case mode of
ReadMode -> [CU.exp| htsFile* { hts_open($(char* fn'), "r") } |]
WriteMode -> [CU.exp| htsFile* { hts_open($(char* fn'), "wb") } |]
_ -> error ""
return $ BamFileHandle h
readBamHeader :: BamFileHandle -> IO FileHeader
readBamHeader (BamFileHandle h) =
BamHeader <$> [CU.exp| bam_hdr_t* { bam_hdr_read($(htsFile* h)->fp.bgzf) } |]
closeBamFile :: BamFileHandle -> IO ()
closeBamFile (BamFileHandle h) = [CU.exp| void { hts_close($(htsFile* h)) } |]
showBamHeader :: FileHeader -> B.ByteString
showBamHeader (BamHeader hdr) = unsafePerformIO $ do
ptr <- [CU.exp| char* { $(bam_hdr_t* hdr)->text } |]
l <- [CU.exp| uint32_t { $(bam_hdr_t* hdr)->l_text } |]
B.packCStringLen (ptr, fromIntegral l)
showBamHeader _ = error "No Bam Header was found."
data SortOrder = Unknown
| Unsorted
| Queryname
| Coordinate
getSortOrder :: FileHeader -> SortOrder
getSortOrder header = case lookup "SO" fields of
Just "unknown" -> Unknown
Just "unsorted" -> Unsorted
Just "queryname" -> Queryname
Just "coordinate" -> Coordinate
_ -> Unknown
where
fields = map (f . B.split ':') $ tail $ B.split '\t' $ head $ B.lines $
showBamHeader header
f [a,b] = (a,b)
f _ = error "Auxiliary field parsing failed!"
bamRead1 :: BamFileHandle -> IO (Maybe Bam)
bamRead1 (BamFileHandle h) = do
(r, b) <- withPtr $ \r -> [CU.block| bam1_t* {
bam1_t *b = bam_init1();
*$(int* r) = bam_read1($(htsFile* h)->fp.bgzf, b);
return b;
} |]
if r < 0 then return Nothing else Just <$> newForeignPtr bamDestory b
foreign import ccall unsafe "&bam_destroy1"
bamDestory :: FunPtr (Ptr Bam' -> IO ())
getChrId :: Bam -> Int32
getChrId = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.tid } |]
getChr :: Ptr BamHdr -> Bam -> Maybe B.ByteString
getChr h b' | i < 0 = Nothing
| otherwise = Just $ unsafePerformIO $ join $ B.packCString <$>
[CU.exp| char* { $(bam_hdr_t* h)->target_name[$(int32_t i)] } |]
where
i = getChrId b'
position :: Bam -> Int32
position = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.pos } |]
endPos :: Bam -> Int32
endPos = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { bam_endpos($(bam1_t* b)) } |]
queryLen :: Bam -> Int32
queryLen = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.l_qseq } |]
isRev :: Bam -> Bool
isRev = unsafePerformIO . flip withForeignPtr fn
where
fn b = do
r <- [CU.exp| int {bam_is_rev($(bam1_t* b)) } |]
return $ if r == 0 then False else True
flag :: Bam -> Word16
flag = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| uint16_t { $(bam1_t* b)->core.flag } |]
mapq :: Bam -> Word8
mapq = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| uint8_t { $(bam1_t* b)->core.qual } |]
getSeq :: Bam -> Maybe B.ByteString
getSeq = unsafePerformIO . flip withForeignPtr fn
where
fn b = do
l <- [CU.exp| int32_t { $(bam1_t* b)->core.l_qseq } |]
if (l == 0)
then return Nothing
else allocaArray (fromIntegral l) $ \str -> do
[CU.block| void {
int32_t i;
uint8_t *s = bam_get_seq($(bam1_t* b));
for (i = 0; i < $(int32_t l); ++i)
$(char* str)[i] = "=ACMGRSVTWYHKDBN"[bam_seqi(s, i)];
} |]
Just <$> B.packCStringLen (str, fromIntegral l)
qName :: Bam -> B.ByteString
qName = unsafePerformIO . flip withForeignPtr fn
where
fn b = join $ B.packCString <$>
[CU.exp| char* {bam_get_qname($(bam1_t* b)) } |]
quality :: Bam -> Maybe B.ByteString
quality = unsafePerformIO . flip withForeignPtr fn
where
fn b = do
l <- [CU.exp| int32_t { $(bam1_t* b)->core.l_qseq } |]
if l == 0
then return Nothing
else do
x <- [CU.block| int8_t {
uint8_t *s = bam_get_qual($(bam1_t* b));
return (s[0] == 0xff);
} |]
if x /= 0
then return Nothing
else allocaArray (fromIntegral l) $ \str -> do
[CU.block| void {
int32_t i;
uint8_t *s = bam_get_qual($(bam1_t* b));
for (i = 0; i < $(int32_t l); ++i)
$(char* str)[i] = s[i];
} |]
Just <$> B.packCStringLen (str, fromIntegral l)
cigar :: Bam -> Maybe [(Int, Char)]
cigar = unsafePerformIO . flip withForeignPtr fn
where
fn b = do
n <- [CU.exp| uint16_t { $(bam1_t* b)->core.n_cigar } |]
if (n == 0)
then return Nothing
else allocaArray (fromIntegral n) $ \num ->
allocaArray (fromIntegral n) $ \str -> do
[CU.block| void {
uint16_t i;
uint32_t *cigar = bam_get_cigar($(bam1_t* b));
for (i = 0; i < $(uint16_t n); ++i) {
$(int* num)[i] = bam_cigar_oplen(cigar[i]);
$(char* str)[i] = bam_cigar_opchr(cigar[i]);
}
} |]
num' <- peekArray (fromIntegral n) num
str' <- peekArray (fromIntegral n) str
return $ Just $
zip (map fromIntegral num') $ map castCCharToChar str'
mateChrId :: Bam -> Int32
mateChrId = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.mtid } |]
mateChr :: Ptr BamHdr -> Bam -> Maybe B.ByteString
mateChr h b' | i < 0 = Nothing
| otherwise = Just $ unsafePerformIO $ join $ B.packCString <$>
[CU.exp| char* { $(bam_hdr_t* h)->target_name[$(int32_t i)] } |]
where
i = mateChrId b'
matePos :: Bam -> Int32
matePos = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.mpos } |]
tLen :: Bam -> Int32
tLen = unsafePerformIO . flip withForeignPtr fn
where
fn b = [CU.exp| int32_t { $(bam1_t* b)->core.isize } |]
bamToSam :: Ptr BamHdr -> Bam -> Sam
bamToSam h b = Sam (qName b) (flag b) (getChr h b) (position b) (mapq b)
(cigar b) (mateChr h b) (matePos b) (tLen b) (getSeq b) (quality b)
hasMultiSegments :: Word16 -> Bool
hasMultiSegments f = testBit f 0
isProperAligned :: Word16 -> Bool
isProperAligned f = testBit f 1
isUnmapped :: Word16 -> Bool
isUnmapped f = testBit f 2
isNextUnmapped :: Word16 -> Bool
isNextUnmapped f = testBit f 3
isRC :: Word16 -> Bool
isRC f = testBit f 4
isNextRC :: Word16 -> Bool
isNextRC f = testBit f 5
isFirstSegment :: Word16 -> Bool
isFirstSegment f = testBit f 6
isLastSegment :: Word16 -> Bool
isLastSegment f = testBit f 7
isSecondary :: Word16 -> Bool
isSecondary f = testBit f 8
isBadQual :: Word16 -> Bool
isBadQual f = testBit f 9
isDup :: Word16 -> Bool
isDup f = testBit f 10
isSupplementary :: Word16 -> Bool
isSupplementary f = testBit f 11