{- Blast Gregory W. Schwartz Collects the functions pertaining to Blasting sequences. -} {-# LANGUAGE OverloadedStrings #-} module Blast ( filterBlastBamRows , blastBamToFasta ) where -- Standard import Data.Maybe import qualified Data.Set as Set -- Cabal import qualified Control.Foldl as Fold import qualified Data.Text as T import Safe import Turtle import Turtle.Line -- Local import Types -- | Get the accessions that successfully align to the reference. getBlastSet :: Command -> Maybe Args -> Subject -> Query -> Shell BlastSet getBlastSet (Command cmd) args (Subject sub) query = do let queryIn (QueryFile x) = input x queryIn (QueryShell x) = x argsIn = maybe [] (fmap T.pack . words . unArgs) args blastOutput = fmap fromJust . mfilter isJust . fmap (headMay . cut "\t" . lineToText) . inproc cmd ( [ "-subject", format fp sub , "-outfmt", "6" , "-max_target_seqs", "1" ] <> argsIn ) . queryIn $ query blastSet <- fmap (BlastSet . Set.fromList) . fold blastOutput $ Fold.list return blastSet -- | Filter out BamRows that do not successfully align to the reference. filterBlastBamRows :: Int -> Command -> Maybe Args -> Subject -> Query -> [BamRow] -> Shell [BamRow] filterBlastBamRows accField cmd args sub query rows = do blastSet <- getBlastSet cmd args sub $ query let newRows = filter (flip Set.member (unBlastSet blastSet) . (`at` accField) . unBamRow) rows return newRows -- | Convert a bam row to fasta for a quick blast check. blastBamToFasta :: BamRow -> [T.Text] blastBamToFasta (BamRow row) = [ ">" <> (fromMaybe (error "No accession in bam.") . headMay $ row) , fromMaybe (error "No sequence in bam.") $ row `atMay` 9 ]