module Bio.BlastHTTP ( BlastHTTPQuery (..),
blastHTTP) where
import Network.HTTP.Conduit
import qualified Data.ByteString.Lazy.Char8 as L8
import qualified Data.ByteString.Char8 as B
import qualified Control.Monad as CM
import Bio.BlastXML
import Text.XML.HXT.Core
import Network
import Data.List
import Control.Concurrent
import Data.Maybe
import Bio.Core.Sequence
import Bio.Sequence.Fasta
import Network.HTTP.Base
data BlastHTTPQuery = BlastHTTPQuery
{ provider :: Maybe String
, program :: Maybe String
, database :: Maybe String
, querySequences :: [Sequence]
, optionalArguments :: Maybe String
, optionalWalltime :: Maybe Int
}
deriving (Show, Eq)
parseHTML :: String -> IOStateArrow s0 b0 XmlTree
parseHTML = readString [withParseHTML yes, withWarnings no]
atId :: ArrowXml a => String -> a XmlTree XmlTree
atId elementId = deep (isElem >>> hasAttrValue "id" (== elementId))
startSession :: String -> String -> String -> String -> Maybe String -> IO String
startSession provider' program' database' querySequences' optionalArguments'
| provider' == "ebi" = startSessionEBI program' database' querySequences' optionalArguments'
| otherwise = startSessionNCBI program' database' querySequences' optionalArguments'
startSessionEBI :: String -> String -> String -> Maybe String -> IO String
startSessionEBI program' database' querySequences' optionalArguments' = do
requestXml <- withSocketsDo
$ sendQueryEBI program' database' querySequences' optionalArguments'
let requestID = L8.unpack requestXml
return requestID
startSessionNCBI :: String -> String -> String -> Maybe String -> IO String
startSessionNCBI program' database' querySequences' optionalArguments' = do
requestXml <- withSocketsDo
$ sendQueryNCBI program' database' querySequences' optionalArguments'
let requestXMLString = L8.unpack requestXml
CM.liftM head (runX $ parseHTML requestXMLString //> atId "rid" >>> getAttrValue "value")
sendQueryEBI :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQueryEBI program' database' querySequences' _ = do
putStrLn "Making HTTP request"
res <- do
initReq <- parseUrl "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/run"
let req = (flip urlEncodedBody) initReq $
[ ("email", "florian.eggenhofer@univie.ac.at")
, ("program", (B.pack program'))
, ("database", (B.pack database'))
, ("stype", "dna")
, ("sequence", (B.pack querySequences'))
]
withManager $ httpLbs req
{ method = "POST" }
putStrLn "EBI Response"
print res
putStrLn "EBI Response Body"
print (responseBody res)
return (responseBody res)
sendQueryNCBI :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQueryNCBI program' database' querySequences' optionalArguments'
| isJust optionalArguments' = simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program' ++ "&DATABASE=" ++ database' ++ fromJust optionalArguments' ++ "&QUERY=" ++ querySequences')
| otherwise = simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program' ++ "&DATABASE=" ++ database' ++ "&QUERY=" ++ querySequences')
retrieveSessionStatus :: String -> String -> IO String
retrieveSessionStatus provider' rid = do
if provider' == "ebi"
then do
statusXml <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/status/" ++ rid)
let statusXMLString = L8.unpack statusXml
putStrLn "EBI statusXMLString"
return statusXMLString
else do
statusXml <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=" ++ rid)
let statusXMLString = L8.unpack statusXml
return statusXMLString
retrieveResult :: String -> String -> IO (Either String BlastResult)
retrieveResult provider' rid = do
if provider' == "ebi"
then do
statusXml <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/result/" ++ rid ++ "/xml")
resultXML <- parseXML statusXml
return (Right resultXML)
else do
statusXml <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=XML&FORMAT_OBJECT=Alignment&CMD=Get")
resultXML <- parseXML statusXml
return (Right resultXML)
checkSessionStatus :: String -> String -> Maybe Int -> Int -> IO (Either String BlastResult)
checkSessionStatus provider' rid walltime consumedTime = do
threadDelay 120000000
status <- retrieveSessionStatus provider' rid
if (isNothing walltime)
then do
waitOrRetrieve provider' status rid walltime consumedTime
else do
if (consumedTime < (fromJust walltime))
then do
waitOrRetrieve provider' status rid walltime (consumedTime + 120000000)
else do
let exceptionMessage = "BLASTHTTP: Query did not return result within walltime"
return (Left exceptionMessage)
waitOrRetrieve :: String -> String -> String -> Maybe Int -> Int -> IO (Either String BlastResult)
waitOrRetrieve provider' status rid walltime consumedTime
| provider' == "ebi" = waitOrRetrieveEBI status rid walltime consumedTime
| otherwise = waitOrRetrieveNCBI status rid walltime consumedTime
waitOrRetrieveEBI :: String -> String -> Maybe Int -> Int -> IO (Either String BlastResult)
waitOrRetrieveEBI status rid walltime consumedTime
| "FINISHED" `isInfixOf` status = retrieveResult "ebi" rid
| "FAILURE" `isInfixOf` status = do
let exceptionMessage = "BLASTHTTP: The EBI blast job failed."
return (Left exceptionMessage)
| "ERROR" `isInfixOf` status = do
let exceptionMessage = "BLASTHTTP: An error occurred attempting to get the EBI blast job status."
return (Left exceptionMessage)
| "NOT_FOUND" `isInfixOf` status = do
let exceptionMessage = "BLASTHTTP: The EBI blast job cannot be found."
return (Left exceptionMessage)
| otherwise = checkSessionStatus "ebi" rid walltime consumedTime
waitOrRetrieveNCBI :: String -> String -> Maybe Int -> Int -> IO (Either String BlastResult)
waitOrRetrieveNCBI status rid walltime consumedTime
| "Status=READY" `isInfixOf` status = retrieveResult "ncbi" rid
| "Status=FAILURE" `isInfixOf` status = do
let exceptionMessage = "Search $rid failed; please report to blast-help at ncbi.nlm.nih.gov.\n"
return (Left exceptionMessage)
| "Status=UNKNOWN" `isInfixOf` status = do
let exceptionMessage = "Search $rid expired.\n"
return (Left exceptionMessage)
| "Status=WAITING" `isInfixOf` status = do
checkSessionStatus "ncbi" rid walltime consumedTime
| otherwise = do
let exceptionMessage = "Status has unexpected value " ++ status ++ " - aborting blast search\n"
return (Left exceptionMessage)
performQuery :: String -> String -> String -> [Sequence] -> Maybe String -> Maybe Int -> IO (Either String BlastResult)
performQuery provider' program' database' querySequences' optionalArgumentMaybe walltime
| null querySequences' = do
let exceptionMessage = "Error - no query sequence provided"
return (Left exceptionMessage)
| otherwise = do
let sequenceString = urlEncode (concatMap showSequenceString querySequences')
rid <- startSession provider' program' database' sequenceString optionalArgumentMaybe
checkSessionStatus provider' rid walltime (0 :: Int)
showSequenceString :: Sequence -> String
showSequenceString fastaSequence = sequenceString
where sequenceHeader = ">" ++ L8.unpack (unSL (seqheader fastaSequence)) ++ "\n"
sequenceData = L8.unpack (unSD (seqdata fastaSequence)) ++ "\n"
sequenceString = sequenceHeader ++ sequenceData
blastHTTP :: BlastHTTPQuery -> IO (Either String BlastResult)
blastHTTP (BlastHTTPQuery provider' program' database' querySequences' optionalArguments' walltime') = do
let defaultProvider = "ncbi"
let defaultProgram = "blastn"
let defaultDatabase = "refseq_genomic"
let defaultWalltime = Nothing
let selectedProvider = fromMaybe defaultProvider provider'
let selectedProgram = fromMaybe defaultProgram program'
let selectedDatabase = fromMaybe defaultDatabase database'
let selectedWalltime = maybe defaultWalltime Just walltime'
performQuery selectedProvider selectedProgram selectedDatabase querySequences' optionalArguments' selectedWalltime