{-# LANGUAGE OverloadedStrings #-} -- | Searches a provided sequence with the NCBI Blast REST service and returns a blast result in xml format as BlastResult. -- -- The function blastHTTP takes the BlastHTTPQuery datatype as argument, which contains following elements: -- -- 1. program: Selects the blast-program to be used for the query. Example values are blastn, blastp, blastx,.. If Nothing is used as argument the function will default to blastn. Type: Maybe String -- -- 2. database: Selects the database to be queried against. Example values are refseq_genomic, nr, est,.. Please consider that the database must be chosen in accordance with the blastprogram. Default value: refseq_genomic. Type: Maybe String -- -- 3. querySequence: nucleotides or protein sequence, depending on the blast program used. If no sequence is provided an exception as String will be produced. Type: Maybe SeqData -- -- 4. entrezQuery: This argument is optional and will filter the result if provided. Type: Maybe String -- -- and returns Either a BlastResult (Right) on success or an exception as String (Left) -- -- If you plan to submit more than 20 searches in one session, please look up the Usage Guidelines in the webservice information . module Bio.BlastHTTP ( BlastHTTPQuery (..), blastHTTP) where import Network.HTTP.Conduit import Data.Conduit import qualified Data.ByteString.Lazy.Char8 as L8 import Control.Monad.IO.Class (liftIO) import qualified Control.Monad as CM import Bio.BlastXML import Text.XML.HXT.Core import Network import qualified Data.Conduit.List as CL import Data.List import Control.Monad.Error as CM import Control.Concurrent import Data.Maybe import Data.Either import Bio.Core.Sequence data BlastHTTPQuery = BlastHTTPQuery { program :: Maybe String , database :: Maybe String , querySequence :: Maybe SeqData , optionalArguments :: Maybe String } deriving (Show, Eq) -- | Parse HTML results into Xml Tree datastructure parseHTML :: String -> IOStateArrow s0 b0 XmlTree parseHTML = readString [withParseHTML yes, withWarnings no] -- | Gets all subtrees with the specified id attribute atName :: ArrowXml a => String -> a XmlTree XmlTree atName elementId = deep (isElem >>> hasAttrValue "name" (== elementId)) -- | Gets all subtrees with the specified id attribute atId :: ArrowXml a => String -> a XmlTree XmlTree atId elementId = deep (isElem >>> hasAttrValue "id" (== elementId)) -- | Send query and parse RID from retrieved HTML startSession :: String -> String -> String -> Maybe String -> IO String startSession program database querySequence optionalArguments = do requestXml <- withSocketsDo $ sendQuery program database querySequence optionalArguments let requestXMLString = L8.unpack requestXml CM.liftM head (runX $ parseHTML requestXMLString //> atId "rid" >>> getAttrValue "value") -- | Send query with or without optional arguments and return response HTML sendQuery :: String -> String -> String -> Maybe String -> IO L8.ByteString sendQuery program database querySequence optionalArguments | isJust optionalArguments = simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program ++ "&DATABASE=" ++ database ++ fromJust optionalArguments ++ "&QUERY=" ++ querySequence) | otherwise = simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program ++ "&DATABASE=" ++ database ++ "&QUERY=" ++ querySequence) -- | Retrieve session status with RID retrieveSessionStatus :: String -> IO String retrieveSessionStatus rid = do statusXml <- withSocketsDo $ simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=" ++ rid) let statusXMLString = L8.unpack statusXml return statusXMLString -- | Retrieve result in blastxml format with RID retrieveResult :: String -> IO (Either String BlastResult) retrieveResult rid = do statusXml <- withSocketsDo $ simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=XML&FORMAT_OBJECT=Alignment&CMD=Get") resultXML <- parseXML statusXml return (Right resultXML) -- | Check if job results are ready and then retrieves results checkSessionStatus :: String -> IO (Either String BlastResult) checkSessionStatus rid = do threadDelay 60000000 status <- retrieveSessionStatus rid waitOrRetrieve status rid waitOrRetrieve :: String -> String -> IO (Either String BlastResult) waitOrRetrieve status rid | "Status=READY" `isInfixOf` status = retrieveResult rid | "Status=FAILURE" `isInfixOf` status = do let exceptionMessage = "Search $rid failed; please report to blast-help at ncbi.nlm.nih.gov.\n" return (Left exceptionMessage) | "Status=UNKNOWN" `isInfixOf` status = do let exceptionMessage = "Search $rid expired.\n" return (Left exceptionMessage) | otherwise = checkSessionStatus rid -- | Sends Query and retrieves result on reaching READY status, will return exeption message if no query sequence has been provided performQuery :: String -> String -> Maybe SeqData -> Maybe String -> IO (Either String BlastResult) performQuery program database querySequenceMaybe optionalArgumentMaybe | isJust querySequenceMaybe = do rid <- startSession program database (L8.unpack (unSD (fromJust querySequenceMaybe))) optionalArgumentMaybe checkSessionStatus rid | otherwise = do let exceptionMessage = "Error - no query sequence provided" return (Left exceptionMessage) -- | Retrieve Blast results in BlastXML format from the NCBI REST Blast interface -- The querySequence has to be provided, all other parameters are optional and can be set to Nothing -- optionalArguments is attached to the query as is .e.g: "&ALIGNMENTS=250" blastHTTP :: BlastHTTPQuery -> IO (Either String BlastResult) blastHTTP (BlastHTTPQuery program database querySequence optionalArguments) = do let defaultProgram = "blastn" let defaultDatabase = "refseq_genomic" let selectedProgram = fromMaybe defaultProgram program let selectedDatabase = fromMaybe defaultDatabase database performQuery selectedProgram selectedDatabase querySequence optionalArguments