{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}

-- | Parses NCBI BLAST+ tabular output

module Biobase.BLAST.Import (blastCmdJSON2FromFile,
                             parseJSONBlast,
                             blastFromFile,
                             parseTabularBlasts,
                             parseTabularHTTPBlasts,
                             blastHTTPFromFile
                            ) where

import Prelude hiding (takeWhile)
import Data.Attoparsec.ByteString.Char8 hiding (isSpace)
import qualified Data.Attoparsec.ByteString.Lazy as L
import qualified Data.ByteString.Char8 as C
import qualified Data.ByteString.Builder as S
import qualified Data.ByteString.Lazy.Char8 as B
import qualified Data.Vector as V
import System.Directory
import Data.Char
import Control.Monad
import Debug.Trace
import Text.Printf
import Biobase.BLAST.Types
import qualified Data.Aeson as A

-- | reads and parses tabular Blast result from provided filePath
blastCmdJSON2FromFile :: String -> IO (Either String BlastCmdJSON2)
blastCmdJSON2FromFile :: String -> IO (Either String BlastCmdJSON2)
blastCmdJSON2FromFile String
filePath = do
  String -> String -> IO ()
forall r. PrintfType r => String -> r
printf String
"# reading blast JSON2 input from file %s\n" String
filePath
  Bool
blastFileExists <- String -> IO Bool
doesFileExist String
filePath
  if Bool
blastFileExists
     then do
       ByteString
bs <- String -> IO ByteString
B.readFile String
filePath
       let json :: Either String BlastCmdJSON2
json = ByteString -> Either String BlastCmdJSON2
parseJSONBlastCmd ByteString
bs
       Either String BlastCmdJSON2 -> IO (Either String BlastCmdJSON2)
forall (m :: * -> *) a. Monad m => a -> m a
return Either String BlastCmdJSON2
json
     else String -> String -> IO (Either String BlastCmdJSON2)
forall a. HasCallStack => String -> a
error String
"# JSON2 blast file \"%s\" does not exist\n" String
filePath

parseJSONBlastCmd :: B.ByteString -> Either String BlastCmdJSON2
parseJSONBlastCmd :: ByteString -> Either String BlastCmdJSON2
parseJSONBlastCmd ByteString
bs = ByteString -> Either String BlastCmdJSON2
forall a. FromJSON a => ByteString -> Either String a
A.eitherDecode ByteString
bs :: Either String BlastCmdJSON2

parseJSONBlast :: B.ByteString -> Either String BlastJSON2
parseJSONBlast :: ByteString -> Either String BlastJSON2
parseJSONBlast ByteString
bs = ByteString -> Either String BlastJSON2
forall a. FromJSON a => ByteString -> Either String a
A.eitherDecode ByteString
bs :: Either String BlastJSON2

-- | reads and parses tabular Blast result from provided filePath
blastFromFile :: String -> IO [BlastTabularResult]
blastFromFile :: String -> IO [BlastTabularResult]
blastFromFile String
filePath = do
  String -> String -> IO ()
forall r. PrintfType r => String -> r
printf String
"# reading tabular blast input from file %s\n" String
filePath
  Bool
blastFileExists <- String -> IO Bool
doesFileExist String
filePath
  if Bool
blastFileExists
     then ByteString -> [BlastTabularResult]
parseTabularBlasts (ByteString -> [BlastTabularResult])
-> IO ByteString -> IO [BlastTabularResult]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
B.readFile String
filePath
     else String -> String -> IO [BlastTabularResult]
forall a. HasCallStack => String -> a
error String
"# tabular blast file \"%s\" does not exist\n" String
filePath

-- | reads and parses tabular HTTP Blast result from provided filePath
blastHTTPFromFile :: String -> IO [BlastTabularResult]
blastHTTPFromFile :: String -> IO [BlastTabularResult]
blastHTTPFromFile String
filePath = do
  String -> String -> IO ()
forall r. PrintfType r => String -> r
printf String
"# reading tabular blast input from file %s\n" String
filePath
  Bool
blastFileExists <- String -> IO Bool
doesFileExist String
filePath
  if Bool
blastFileExists
      then ByteString -> [BlastTabularResult]
parseTabularHTTPBlasts (ByteString -> [BlastTabularResult])
-> IO ByteString -> IO [BlastTabularResult]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO ByteString
B.readFile String
filePath
      else String -> String -> IO [BlastTabularResult]
forall a. HasCallStack => String -> a
error String
"# tabular blast file \"%s\" does not exist\n" String
filePath

-- | Read a lazy bytestring and stream out a lsit of @BlastTabularResult@'s.
-- In case, there is a parse error "late" in the file, we might have
-- already streamed out some (or many!) of these results.

parseTabularBlasts :: B.ByteString -> [BlastTabularResult]
parseTabularBlasts :: ByteString -> [BlastTabularResult]
parseTabularBlasts = ByteString -> [BlastTabularResult]
go
  where go :: ByteString -> [BlastTabularResult]
go ByteString
xs = case Parser BlastTabularResult
-> ByteString -> Result BlastTabularResult
forall a. Parser a -> ByteString -> Result a
L.parse Parser BlastTabularResult
genParseTabularBlast ByteString
xs of
          L.Fail ByteString
remainingInput [String]
ctxts String
err  -> String -> [BlastTabularResult]
forall a. HasCallStack => String -> a
error (String -> [BlastTabularResult]) -> String -> [BlastTabularResult]
forall a b. (a -> b) -> a -> b
$ String
"parseTabularBlasts failed! " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
err String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" ctxt: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
forall a. Show a => a -> String
show [String]
ctxts String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" head of remaining input: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack (Int64 -> ByteString -> ByteString
B.take Int64
1000 ByteString
remainingInput)
          L.Done ByteString
remainingInput BlastTabularResult
btr
            | ByteString -> Bool
B.null ByteString
remainingInput  -> [BlastTabularResult
btr]
            | Bool
otherwise              -> BlastTabularResult
btr BlastTabularResult -> [BlastTabularResult] -> [BlastTabularResult]
forall a. a -> [a] -> [a]
: ByteString -> [BlastTabularResult]
go ByteString
remainingInput

parseTabularHTTPBlasts :: B.ByteString -> [BlastTabularResult]
parseTabularHTTPBlasts :: ByteString -> [BlastTabularResult]
parseTabularHTTPBlasts = ByteString -> [BlastTabularResult]
go
  where go :: ByteString -> [BlastTabularResult]
go ByteString
xs = case Parser BlastTabularResult
-> ByteString -> Result BlastTabularResult
forall a. Parser a -> ByteString -> Result a
L.parse Parser BlastTabularResult
genParseTabularHTTPBlast ByteString
xs of
          L.Fail ByteString
remainingInput [String]
ctxts String
err  -> String -> [BlastTabularResult]
forall a. HasCallStack => String -> a
error (String -> [BlastTabularResult]) -> String -> [BlastTabularResult]
forall a b. (a -> b) -> a -> b
$ String
"parseTabularHTTPBlasts failed! " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
err String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" ctxt: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ [String] -> String
forall a. Show a => a -> String
show [String]
ctxts String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" head of remaining input: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ ByteString -> String
B.unpack (Int64 -> ByteString -> ByteString
B.take Int64
1000 ByteString
remainingInput)
          L.Done ByteString
remainingInput BlastTabularResult
btr
            | ByteString -> Bool
B.null ByteString
remainingInput  -> [BlastTabularResult
btr]
            | Bool
otherwise              -> BlastTabularResult
btr BlastTabularResult -> [BlastTabularResult] -> [BlastTabularResult]
forall a. a -> [a] -> [a]
: ByteString -> [BlastTabularResult]
go ByteString
remainingInput

genParseBlastProgram :: Parser BlastProgram
genParseBlastProgram :: Parser BlastProgram
genParseBlastProgram = do
  [Parser ByteString ByteString] -> Parser ByteString ByteString
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [ByteString -> Parser ByteString ByteString
string ByteString
"# BLAST",ByteString -> Parser ByteString ByteString
string ByteString
"# blast"]
  (Char -> Char
toLower (Char -> Char) -> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Char
anyChar) Parser ByteString Char
-> (Char -> Parser BlastProgram) -> Parser BlastProgram
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= BlastProgram -> Parser BlastProgram
forall (m :: * -> *) a. Monad m => a -> m a
return (BlastProgram -> Parser BlastProgram)
-> (Char -> BlastProgram) -> Char -> Parser BlastProgram
forall b c a. (b -> c) -> (a -> b) -> a -> c
. \case
    Char
'x' -> BlastProgram
BlastX
    Char
'p' -> BlastProgram
BlastP
    Char
'n' -> BlastProgram
BlastN

genParseTabularBlast :: Parser BlastTabularResult
genParseTabularBlast :: Parser BlastTabularResult
genParseTabularBlast = do
  BlastProgram
_blastProgram <- Parser BlastProgram
genParseBlastProgram Parser BlastProgram -> String -> Parser BlastProgram
forall i a. Parser i a -> String -> Parser i a
<?> String
"Program"
  Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser ByteString Char
notChar Char
'\n')
  Parser ()
endOfLine
  ByteString -> Parser ByteString ByteString
string ByteString
"# Query: " Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"Query"
  ByteString
_blastQueryId <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) Parser ByteString ByteString
-> Parser ByteString String -> Parser ByteString ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString Char -> Parser () -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar Parser ()
endOfLine Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"QueryId"
  ByteString -> Parser ByteString ByteString
string ByteString
"# Database: " Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"Database"
  String
_blastDatabase <- Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser ByteString Char
notChar Char
'\n') Parser ByteString String -> String -> Parser ByteString String
forall i a. Parser i a -> String -> Parser i a
<?> String
"Db"
  ByteString -> Parser ByteString ByteString
string ByteString
"\n# " Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"header linebreak"
  --fields line
  Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser () -> Parser ()
forall i a. Parser i a -> Parser i a
try Parser ()
genParseFieldLine) Parser () -> String -> Parser ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"Fields"
  Int
_blastHitNumber <- Parser Int
forall a. Integral a => Parser a
decimal  Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"Hit number"
  ByteString -> Parser ByteString ByteString
string ByteString
" hits found\n" Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"hits found"
  [BlastTabularHit]
_tabularHit <- Int
-> Parser ByteString BlastTabularHit
-> Parser ByteString [BlastTabularHit]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count  Int
_blastHitNumber (Parser ByteString BlastTabularHit
-> Parser ByteString BlastTabularHit
forall i a. Parser i a -> Parser i a
try Parser ByteString BlastTabularHit
genParseBlastTabularHit)  Parser ByteString [BlastTabularHit]
-> String -> Parser ByteString [BlastTabularHit]
forall i a. Parser i a -> String -> Parser i a
<?> String
"Tabular hit"
  Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany Parser ()
endOfLine
  BlastTabularResult -> Parser BlastTabularResult
forall (m :: * -> *) a. Monad m => a -> m a
return (BlastTabularResult -> Parser BlastTabularResult)
-> BlastTabularResult -> Parser BlastTabularResult
forall a b. (a -> b) -> a -> b
$ BlastProgram
-> ByteString
-> ByteString
-> Int
-> Vector BlastTabularHit
-> BlastTabularResult
BlastTabularResult BlastProgram
_blastProgram (ByteString -> ByteString
toLB ByteString
_blastQueryId) (String -> ByteString
B.pack String
_blastDatabase) Int
_blastHitNumber ([BlastTabularHit] -> Vector BlastTabularHit
forall a. [a] -> Vector a
V.fromList [BlastTabularHit]
_tabularHit)

genParseTabularHTTPBlast :: Parser BlastTabularResult
genParseTabularHTTPBlast :: Parser BlastTabularResult
genParseTabularHTTPBlast = do
  BlastProgram
_blastProgram <- Parser BlastProgram
genParseBlastProgram Parser BlastProgram -> String -> Parser BlastProgram
forall i a. Parser i a -> String -> Parser i a
<?> String
"Program"
  --many1 (notChar '\n')
  Parser ()
endOfLine
  ByteString -> Parser ByteString ByteString
string ByteString
"# Iteration: " Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"Iteration" -----
  ByteString
_ <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) Parser ByteString ByteString
-> Parser ByteString String -> Parser ByteString ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString Char -> Parser () -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar Parser ()
endOfLine Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"IterationNumber" -----
  ByteString -> Parser ByteString ByteString
string ByteString
"# Query: " Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"Query"
  ByteString
_blastQueryId <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) Parser ByteString ByteString
-> Parser ByteString String -> Parser ByteString ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString Char -> Parser () -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar Parser ()
endOfLine Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"QueryId"
  ByteString -> Parser ByteString ByteString
string ByteString
"# RID: " Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"RID" -----
  ByteString
_ <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) Parser ByteString ByteString
-> Parser ByteString String -> Parser ByteString ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString Char -> Parser () -> Parser ByteString String
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser ByteString Char
anyChar Parser ()
endOfLine Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"RID" -----
  ByteString -> Parser ByteString ByteString
string ByteString
"# Database: " Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"Database"
  String
_blastDatabase <- Parser ByteString Char -> Parser ByteString String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 (Char -> Parser ByteString Char
notChar Char
'\n') Parser ByteString String -> String -> Parser ByteString String
forall i a. Parser i a -> String -> Parser i a
<?> String
"Db"
  ByteString -> Parser ByteString ByteString
string ByteString
"\n# " Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"header linebreak"
  --fields line
  Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Parser () -> Parser ()
forall i a. Parser i a -> Parser i a
try Parser ()
genParseFieldLine) Parser () -> String -> Parser ()
forall i a. Parser i a -> String -> Parser i a
<?> String
"Fields"
  Int
_blastHitNumber <- Parser Int
forall a. Integral a => Parser a
decimal  Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"Hit number"
  ByteString -> Parser ByteString ByteString
string ByteString
" hits found\n" Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"hits found"
  [BlastTabularHit]
_tabularHit <- Int
-> Parser ByteString BlastTabularHit
-> Parser ByteString [BlastTabularHit]
forall (m :: * -> *) a. Monad m => Int -> m a -> m [a]
count  Int
_blastHitNumber (Parser ByteString BlastTabularHit
-> Parser ByteString BlastTabularHit
forall i a. Parser i a -> Parser i a
try Parser ByteString BlastTabularHit
genParseBlastHTTPTabularHit)  Parser ByteString [BlastTabularHit]
-> String -> Parser ByteString [BlastTabularHit]
forall i a. Parser i a -> String -> Parser i a
<?> String
"Tabular hit"
  Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany Parser ()
endOfLine
  BlastTabularResult -> Parser BlastTabularResult
forall (m :: * -> *) a. Monad m => a -> m a
return (BlastTabularResult -> Parser BlastTabularResult)
-> BlastTabularResult -> Parser BlastTabularResult
forall a b. (a -> b) -> a -> b
$ BlastProgram
-> ByteString
-> ByteString
-> Int
-> Vector BlastTabularHit
-> BlastTabularResult
BlastTabularResult BlastProgram
_blastProgram (ByteString -> ByteString
toLB ByteString
_blastQueryId) (String -> ByteString
B.pack String
_blastDatabase) Int
_blastHitNumber ([BlastTabularHit] -> Vector BlastTabularHit
forall a. [a] -> Vector a
V.fromList [BlastTabularHit]
_tabularHit)

genParseFieldLine :: Parser ()
genParseFieldLine :: Parser ()
genParseFieldLine = do
  ByteString -> Parser ByteString ByteString
string ByteString
"Fields:"
  Parser ByteString Char -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany (Char -> Parser ByteString Char
notChar Char
'\n')
  ByteString -> Parser ByteString ByteString
string ByteString
"\n# "
  () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()

genParseBlastTabularHit :: Parser BlastTabularHit
genParseBlastTabularHit :: Parser ByteString BlastTabularHit
genParseBlastTabularHit = do
  ByteString
_queryId <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile1 ((Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/=Int
9) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit qid"
  Char -> Parser ByteString Char
char Char
'\t'
  ByteString
_subjectId <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile1 ((Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/=Int
9) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit sid"
  Char -> Parser ByteString Char
char Char
'\t'
  Double
_seqIdentity <- Parser Double
double Parser Double -> String -> Parser Double
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit seqid"
  Char -> Parser ByteString Char
char Char
'\t'
  Int
_alignmentLength <- Parser Int
forall a. Integral a => Parser a
decimal  Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit sid"
  Char -> Parser ByteString Char
char Char
'\t'
  Int
_misMatches <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit mmatch"
  Char -> Parser ByteString Char
char Char
'\t'
  Int
_gapOpenScore <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit gopen"
  Char -> Parser ByteString Char
char Char
'\t'
  Int
_queryStart <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit qstart"
  Char -> Parser ByteString Char
char Char
'\t'
  Int
_queryEnd <- Parser Int
forall a. Integral a => Parser a
decimal  Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit qend"
  Char -> Parser ByteString Char
char Char
'\t'
  Int
_hitSeqStart <- Parser Int
forall a. Integral a => Parser a
decimal  Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit sstart"
  Char -> Parser ByteString Char
char Char
'\t'
  Int
_hitSeqEnd <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit send"
  Char -> Parser ByteString Char
char Char
'\t'
  Double
_eValue <- Parser Double
double Parser Double -> String -> Parser Double
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit eval"
  Char -> Parser ByteString Char
char Char
'\t'
  Double
_bitScore <- Parser Double
double Parser Double -> String -> Parser Double
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit bs"
  Char -> Parser ByteString Char
char Char
'\t'
  Int
_subjectFrame <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit sF"
  Char -> Parser ByteString Char
char Char
'\t'
  ByteString
_querySeq <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile1 ((Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/=Int
9) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit qseq" -- 9 == '\t'
  Char -> Parser ByteString Char
char Char
'\t'
  ByteString
_subjectSeq <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile1 ((Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/=Int
10) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit subSeq" -- 10 == '\n'
  Char -> Parser ByteString Char
char Char
'\n'
  BlastTabularHit -> Parser ByteString BlastTabularHit
forall (m :: * -> *) a. Monad m => a -> m a
return (BlastTabularHit -> Parser ByteString BlastTabularHit)
-> BlastTabularHit -> Parser ByteString BlastTabularHit
forall a b. (a -> b) -> a -> b
$ ByteString
-> ByteString
-> Double
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Double
-> Double
-> Int
-> ByteString
-> ByteString
-> BlastTabularHit
BlastTabularHit (ByteString -> ByteString
B.fromStrict ByteString
_queryId) (ByteString -> ByteString
B.fromStrict ByteString
_subjectId) Double
_seqIdentity Int
_alignmentLength Int
_misMatches Int
_gapOpenScore Int
_queryStart Int
_queryEnd Int
_hitSeqStart Int
_hitSeqEnd Double
_eValue Double
_bitScore Int
_subjectFrame (ByteString -> ByteString
B.fromStrict ByteString
_querySeq) (ByteString -> ByteString
B.fromStrict ByteString
_subjectSeq)

-- specific for Tabular Blast from NCBI HTTP requests
genParseBlastHTTPTabularHit :: Parser BlastTabularHit
genParseBlastHTTPTabularHit :: Parser ByteString BlastTabularHit
genParseBlastHTTPTabularHit = do
    ByteString
_queryId <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile1 ((Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/=Int
9) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit qid"
    Char -> Parser ByteString Char
char Char
'\t'
    ByteString
_subjectId <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile1 ((Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/=Int
9) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit sid"
    Char -> Parser ByteString Char
char Char
'\t'
    ByteString
_ <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile1 ((Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/=Int
9) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"redundant id1"
    Char -> Parser ByteString Char
char Char
'\t'
    ByteString
_ <- (Char -> Bool) -> Parser ByteString ByteString
takeWhile1 ((Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/=Int
9) (Int -> Bool) -> (Char -> Int) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord) Parser ByteString ByteString
-> String -> Parser ByteString ByteString
forall i a. Parser i a -> String -> Parser i a
<?> String
"redundant id2"
    Char -> Parser ByteString Char
char Char
'\t'
    Double
_seqIdentity <- Parser Double
double Parser Double -> String -> Parser Double
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit seqid"
    Char -> Parser ByteString Char
char Char
'\t'
    Int
_alignmentLength <- Parser Int
forall a. Integral a => Parser a
decimal  Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit sid"
    Char -> Parser ByteString Char
char Char
'\t'
    Int
_misMatches <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit mmatch"
    Char -> Parser ByteString Char
char Char
'\t'
    Int
_gapOpenScore <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit gopen"
    Char -> Parser ByteString Char
char Char
'\t'
    Int
_queryStart <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit qstart"
    Char -> Parser ByteString Char
char Char
'\t'
    Int
_queryEnd <- Parser Int
forall a. Integral a => Parser a
decimal  Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit qend"
    Char -> Parser ByteString Char
char Char
'\t'
    Int
_hitSeqStart <- Parser Int
forall a. Integral a => Parser a
decimal  Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit sstart"
    Char -> Parser ByteString Char
char Char
'\t'
    Int
_hitSeqEnd <- Parser Int
forall a. Integral a => Parser a
decimal Parser Int -> String -> Parser Int
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit send"
    Char -> Parser ByteString Char
char Char
'\t'
    Double
_eValue <- Parser Double
double Parser Double -> String -> Parser Double
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit eval"
    Char -> Parser ByteString Char
char Char
'\t'
    Double
_bitScore <- Parser Double
double Parser Double -> String -> Parser Double
forall i a. Parser i a -> String -> Parser i a
<?> String
"hit bs"
    Char -> Parser ByteString Char
char Char
'\n'
    BlastTabularHit -> Parser ByteString BlastTabularHit
forall (m :: * -> *) a. Monad m => a -> m a
return (BlastTabularHit -> Parser ByteString BlastTabularHit)
-> BlastTabularHit -> Parser ByteString BlastTabularHit
forall a b. (a -> b) -> a -> b
$ ByteString
-> ByteString
-> Double
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Int
-> Double
-> Double
-> Int
-> ByteString
-> ByteString
-> BlastTabularHit
BlastTabularHit (ByteString -> ByteString
B.fromStrict ByteString
_queryId) (ByteString -> ByteString
B.fromStrict ByteString
_subjectId) Double
_seqIdentity Int
_alignmentLength Int
_misMatches Int
_gapOpenScore Int
_queryStart Int
_queryEnd Int
_hitSeqStart Int
_hitSeqEnd Double
_eValue Double
_bitScore Int
0 ByteString
B.empty ByteString
B.empty

-- Blast evalues can be reported as e.g. .051e-22 not supported by double parsing
readEvalue :: C.ByteString -> Double
readEvalue :: ByteString -> Double
readEvalue ByteString
eValBs
  | (String -> Char
forall a. [a] -> a
head String
stringEval) Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'.' = String -> Double
forall a. Read a => String -> a
read (Char
'0'Char -> String -> String
forall a. a -> [a] -> [a]
:(String
stringEval)) :: Double
  | Bool
otherwise = String -> Double
forall a. Read a => String -> a
read String
stringEval :: Double
  where stringEval :: String
stringEval = ByteString -> String
C.unpack ByteString
eValBs

--IUPAC amino acid with gap
--aminoacidLetters :: Char -> Bool
aminoacidLetters :: Char -> Bool
aminoacidLetters = String -> Char -> Bool
inClass String
"ARNDCQEGHILMFPSTWYVBZX-"

--IUPAC nucleic acid characters with gap
--nucleotideLetters :: Char -> Bool
nucleotideLetters :: Char -> Bool
nucleotideLetters = String -> Char -> Bool
inClass String
"AGTCURYSWKMBDHVN-."

--IUPAC nucleic acid characters with gap
--bioLetters :: Char -> Bool
bioLetters :: Char -> Bool
bioLetters = String -> Char -> Bool
inClass String
"ABCDEFGHIJKLMNOPQRSTUVWXYZ.-"


toLB :: C.ByteString -> B.ByteString
toLB :: ByteString -> ByteString
toLB = Builder -> ByteString
S.toLazyByteString (Builder -> ByteString)
-> (ByteString -> Builder) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Builder
S.byteString