module Bio.BlastHTTP ( BlastHTTPQuery (..),
blastHTTP) where
import Network.HTTP.Conduit
import Data.Conduit
import qualified Data.ByteString.Lazy.Char8 as L8
import Control.Monad.IO.Class (liftIO)
import qualified Control.Monad as CM
import Bio.BlastXML
import Text.XML.HXT.Core
import Network
import qualified Data.Conduit.List as CL
import Data.List
import Control.Monad.Error as CM
import Control.Concurrent
import Data.Maybe
import Data.Either
import Bio.Core.Sequence
data BlastHTTPQuery = BlastHTTPQuery
{ program :: Maybe String
, database :: Maybe String
, querySequence :: Maybe SeqData
, entrezQuery :: Maybe String }
deriving (Show, Eq)
parseHTML :: String -> IOStateArrow s0 b0 XmlTree
parseHTML = readString [withParseHTML yes, withWarnings no]
atName :: ArrowXml a => String -> a XmlTree XmlTree
atName elementId = deep (isElem >>> hasAttrValue "name" (== elementId))
atId :: ArrowXml a => String -> a XmlTree XmlTree
atId elementId = deep (isElem >>> hasAttrValue "id" (== elementId))
startSession :: String -> String -> String -> Maybe String -> IO String
startSession program database querySequence entrezQuery = do
requestXml <- withSocketsDo
$ sendEntrezQuery program database querySequence entrezQuery
let requestXMLString = L8.unpack requestXml
CM.liftM head (runX $ parseHTML requestXMLString //> atId "rid" >>> getAttrValue "value")
sendEntrezQuery :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendEntrezQuery program database querySequence entrezQuery
| isJust entrezQuery = simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program ++ "&DATABASE=" ++ database ++ "&QUERY=" ++ querySequence ++ "&ENTREZ_QUERY=" ++ fromJust entrezQuery)
| otherwise = simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program ++ "&DATABASE=" ++ database ++ "&QUERY=" ++ querySequence)
retrieveSessionStatus :: String -> IO String
retrieveSessionStatus rid = do
statusXml <- withSocketsDo
$ simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=" ++ rid)
let statusXMLString = L8.unpack statusXml
return statusXMLString
retrieveResult :: String -> IO (Either String BlastResult)
retrieveResult rid = do
statusXml <- withSocketsDo
$ simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=XML&FORMAT_OBJECT=Alignment&CMD=Get")
resultXML <- parseXML statusXml
return (Right resultXML)
checkSessionStatus :: String -> Int -> IO (Either String BlastResult)
checkSessionStatus rid counter = do
let counter2 = counter + 1
let counter2string = show counter2
threadDelay 60000000
status <- retrieveSessionStatus rid
waitOrRetrieve status rid counter2
waitOrRetrieve :: String -> String -> Int -> IO (Either String BlastResult)
waitOrRetrieve status rid counter
| "Status=READY" `isInfixOf` status = retrieveResult rid
| "Status=FAILURE" `isInfixOf` status = do
let exceptionMessage = "Search $rid failed; please report to blast-help at ncbi.nlm.nih.gov.\n"
return (Left exceptionMessage)
| "Status=UNKNOWN" `isInfixOf` status = do
let exceptionMessage = "Search $rid expired.\n"
return (Left exceptionMessage)
| otherwise = checkSessionStatus rid counter
performQuery :: String -> String -> Maybe SeqData -> Maybe String -> Int -> IO (Either String BlastResult)
performQuery program database querySequenceMaybe entrezQueryMaybe counter
| isJust querySequenceMaybe = do
rid <- startSession program database (L8.unpack (unSD (fromJust querySequenceMaybe))) entrezQueryMaybe
checkSessionStatus rid counter
| otherwise = do
let exceptionMessage = "Error - no query sequence provided"
return (Left exceptionMessage)
blastHTTP :: BlastHTTPQuery -> IO (Either String BlastResult)
blastHTTP (BlastHTTPQuery program database querySequence entrezQuery) = do
let counter = 1
let defaultProgram = "blastn"
let defaultDatabase = "refseq_genomic"
let selectedProgram = fromMaybe defaultProgram program
let selectedDatabase = fromMaybe defaultDatabase database
performQuery selectedProgram selectedDatabase querySequence entrezQuery counter