{-# LANGUAGE OverloadedStrings #-} module Main where import Control.Exception import Control.Monad import qualified Data.ByteString.Char8 as BS import Data.List import Data.Maybe import System.Environment import System.Exit import System.FilePath import System.IO import System.Process import Bio.SeqLoc.LocRepr import qualified Bio.SamTools.Bam as Bam import qualified Bio.SamTools.BamIndex as BamIndex import qualified Bio.SamTools.Cigar as Cigar geneTargetName = "chr1" geneTargetBounds = (14362 - 100, 16764 + 100) geneRegion = BS.concat [ geneTargetName, ":" , BS.pack . show . fst $ geneTargetBounds, "-" , BS.pack . show . snd $ geneTargetBounds ] main :: IO () main = getArgs >>= mainWithArgs where mainWithArgs [ bamin ] = doSamTest bamin mainWithArgs _ = do prog <- getProgName error . unwords $ [ "USAGE:", prog, "" ] doSamTest :: FilePath -> IO () doSamTest bamin = do samout <- bamToSam bamin let samout' = samout ++ "_samtools" rawSystemE "samtools" [ "view", bamin, "-h", "-o", samout' ] rawSystemP "diff" [ samout, samout' ] samout2 <- samToBam samout >>= bamToSam rawSystemP "diff" [ samout, samout2 ] genesam <- extract bamin let genesam' = genesam ++ "_samtools" rawSystemE "samtools" [ "view", bamin, "-h", "-o", genesam', BS.unpack geneRegion ] rawSystemP "diff" [ genesam, genesam' ] header <- headerToIndex bamin let header' = header ++ "_samtools" rawSystemE "samtools" [ "view", bamin, "-H", "-o", header' ] rawSystemP "diff" [ header, header' ] bracket (Bam.openBamInFile bamin) Bam.closeInHandle $ \hin -> Bam.get1 hin >>= maybe (return ()) (parseBam (Bam.inHeader hin)) addAuxFields bamin >>= hPutStrLn stderr addAuxFields :: FilePath -> IO FilePath addAuxFields inname = let outname = dropExtension inname ++ "-auxa.bam" in bracket (Bam.openBamInFile inname) Bam.closeInHandle $ \hin -> bracket (Bam.openBamOutFile outname (Bam.inHeader hin)) Bam.closeOutHandle $ \hout -> do myloop hin hout 0 return outname where myloop hin hout idx = Bam.get1 hin >>= maybe (return ()) (\b -> go b >> myloop hin hout (idx + 1)) where go b = Bam.addAuxA b "XX" 'm' >>= (\b' -> Bam.addAuxi b' "YZ" idx) >>= (\b'' -> Bam.addAuxZ b'' "aa" (digitName $ idx `mod` 10)) >>= Bam.put1 hout digitName i | i < 0 || i > 9 = show i | otherwise = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] !! i bamToSam :: FilePath -> IO FilePath bamToSam inname = let outname = dropExtension inname ++ "-test.sam" in bracket (Bam.openBamInFile inname) Bam.closeInHandle $ \hin -> bracket (Bam.openTamOutFile outname (Bam.inHeader hin)) Bam.closeOutHandle $ \hout -> do loop (Bam.get1 hin) (Bam.put1 hout) return outname samToBam :: FilePath -> IO FilePath samToBam inname = let outname = dropExtension inname ++ "-test.bam" in bracket (Bam.openTamInFile inname) Bam.closeInHandle $ \hin -> bracket (Bam.openBamOutFile outname (Bam.inHeader hin)) Bam.closeOutHandle $ \hout -> do loop (Bam.get1 hin) (Bam.put1 hout) return outname headerToIndex :: FilePath -> IO FilePath headerToIndex inname = let outname = dropExtension inname ++ "-index.txt" in bracket (Bam.openBamInFile inname) Bam.closeInHandle $ \hin -> withFile outname WriteMode $ \hout -> let hseqs = Bam.targetSeqList $ Bam.inHeader hin in do forM_ hseqs $ \hseq -> hPutStrLn hout . concat $ [ "@SQ\tSN:", BS.unpack . Bam.name $ hseq, "\tLN:", show . Bam.len $ hseq ] return outname extract :: FilePath -> IO FilePath extract inname = let outname = dropExtension inname ++ "-gene.sam" outname2 = dropExtension inname ++ "-gene-sploc.sam" in bracket (BamIndex.open inname) (BamIndex.close) $ \idxin -> let header = BamIndex.idxHeader idxin tid = fromMaybe (error $ "No sequence " ++ show geneTargetName) $ Bam.lookupTarget (BamIndex.idxHeader idxin) $ geneTargetName in bracket (Bam.openTamOutFile outname header) Bam.closeOutHandle $ \hout -> withFile outname2 WriteMode $ \hout2 -> do unless (Bam.targetSeqName header tid == geneTargetName) $ error "Bad target name" q <- BamIndex.query idxin tid geneTargetBounds loop (BamIndex.next q) $ \b -> do Bam.put1 hout b hPutStrLn hout2 . concat $ [ maybe "n/a" (BS.unpack . repr) . Bam.refSeqLoc $ b , "\t" , show b ] return outname parseBam :: Bam.Header -> Bam.Bam1 -> IO () parseBam hdr b = sequence_ [ verify (Bam.queryName b) (bamfields !! 0) , verify (Bam.targetName b) (Just $ bamfields !! 2) , verify (liftM (BS.pack . show . succ) . Bam.position $ b) (Just $ bamfields !! 3) , verify (Bam.querySeq b) (Just $ bamfields !! 9) ] where bamfields = BS.split '\t' . BS.pack . show $ b verify s1 s2 | s1 == s2 = return () | otherwise = error $ "Mismatch: " ++ show (s1, s2) loop :: (IO (Maybe a)) -> (a -> IO ()) -> IO () loop mi mo = go where go = mi >>= maybe (return ()) (\i -> mo i >> go) rawSystemE :: String -> [String] -> IO () rawSystemE prog args = rawSystem prog args >>= checkExit where checkExit ExitSuccess = return () checkExit (ExitFailure err) = error $ show (prog : args) ++ " => " ++ show err rawSystem_ :: String -> [String] -> IO () rawSystem_ prog args = rawSystem prog args >>= checkExit >> return () where checkExit ExitSuccess = return () checkExit (ExitFailure err) = hPutStrLn stderr $ show (prog : args) ++ " => " ++ show err rawSystemP :: String -> [String] -> IO () rawSystemP prog args = rawSystem prog args >>= checkExit >> return () where checkExit ExitSuccess = hPutStrLn stderr $ show (prog : args) ++ " => 0" checkExit (ExitFailure err) = hPutStrLn stderr $ show (prog : args) ++ " => " ++ show err