{-# LANGUAGE OverloadedStrings #-}

-- | Searches a provided sequence with the NCBI Blast REST service and returns a blast result in xml format as BlastResult.
--
-- The function blastHTTP takes the BlastHTTPQuery datatype as argument, which contains following elements: 
--
-- 1. program:  Selects the blast-program to be used for the query. Example values are blastn, blastp, blastx,.. If Nothing is used as argument the function will default to blastn. Type: Maybe String
--
-- 2. database: Selects the database to be queried against. Example values are refseq_genomic, nr, est,.. Please consider that the database must be chosen in accordance with the blastprogram. Default value: refseq_genomic. Type: Maybe String
--
-- 3. querySequences: nucleotides or protein sequences, depending on the blast program used. If no sequence is provided an exception as String will be produced. Type: [Sequence]
--
-- 4. optionalArguments: This argument is optional and will filter the result if provided. Type: Maybe String
--
-- 5. optionalWalltime: Optional walltime in mircroseconds. If specified, will terminate the query after reaching the timelimit and return Left. Type: Maybe Int
--
-- and returns Either a BlastResult (Right) on success or an exception as String (Left)
--
-- If you plan to submit more than 20 searches in one session, please look up the Usage Guidelines in the webservice information <http://www.ncbi.nlm.nih.gov/BLAST/developer.shtml>.
module Bio.BlastHTTP ( BlastHTTPQuery (..),
                       blastHTTP) where

import Network.HTTP.Conduit    
import qualified Data.ByteString.Lazy.Char8 as L8 
import qualified Data.ByteString.Char8 as B
import qualified Control.Monad as CM
import Bio.BlastXML 
import Text.XML.HXT.Core
import Network
import Data.List
import Control.Concurrent
import Data.Maybe
import Bio.Core.Sequence
import Bio.Sequence.Fasta
import Network.HTTP.Base

data BlastHTTPQuery = BlastHTTPQuery 
  { provider :: Maybe String
  , program :: Maybe String
  , database :: Maybe String
  , querySequences :: [Sequence]
  , optionalArguments :: Maybe String
  , optionalWalltime :: Maybe Int
  }
  deriving (Show, Eq)

-- | Parse HTML results into Xml Tree datastructure
parseHTML :: String -> IOStateArrow s0 b0 XmlTree
parseHTML = readString [withParseHTML yes, withWarnings no] 

-- | Gets all subtrees with the specified id attribute
atId :: ArrowXml a =>  String -> a XmlTree XmlTree
atId elementId = deep (isElem >>> hasAttrValue "id" (== elementId))
      
-- | Send query and parse RID from retrieved HTML 
startSession :: String -> String -> String -> String -> Maybe String -> IO String
startSession provider' program' database' querySequences' optionalArguments'
  | provider' == "ebi" = startSessionEBI program' database' querySequences' optionalArguments' 
  | otherwise = startSessionNCBI program' database' querySequences' optionalArguments' 

startSessionEBI :: String -> String -> String -> Maybe String -> IO String
startSessionEBI  program' database' querySequences' optionalArguments' = do
  requestXml <- withSocketsDo
      $ sendQueryEBI program' database' querySequences' optionalArguments'
  let requestID = L8.unpack requestXml
  return requestID

startSessionNCBI :: String -> String -> String -> Maybe String -> IO String
startSessionNCBI program' database' querySequences' optionalArguments' = do
  requestXml <- withSocketsDo
      $ sendQueryNCBI program' database' querySequences' optionalArguments'
  let requestXMLString = L8.unpack requestXml
  CM.liftM head (runX $ parseHTML requestXMLString //> atId "rid" >>> getAttrValue "value")

-- | Send query with or without optional arguments and return response HTML
sendQueryEBI :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQueryEBI program' database' querySequences' _ = do
  putStrLn "Making HTTP request"
  res <- do
    --initReq <- parseUrl "http://postcatcher.in/catchers/541811052cb53502000001a7"
    initReq <- parseUrl "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/run"
    let req = (flip urlEncodedBody) initReq $
             [ ("email", "florian.eggenhofer@univie.ac.at")
             , ("program", (B.pack program'))
             , ("database", (B.pack database'))
             , ("stype", "dna")
             , ("sequence", (B.pack querySequences'))
             ]
    withManager $ httpLbs req
        { method = "POST" }
  putStrLn "EBI Response"
  print res
  putStrLn "EBI Response Body"
  print (responseBody res)
  return (responseBody res) 

-- | Send query with or without optional arguments and return response HTML
sendQueryNCBI :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQueryNCBI program' database' querySequences' optionalArguments'
  | isJust optionalArguments' = simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program' ++ "&DATABASE=" ++ database' ++ fromJust optionalArguments' ++ "&QUERY=" ++ querySequences')
  | otherwise = simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program' ++ "&DATABASE=" ++ database' ++ "&QUERY=" ++ querySequences')
         
-- | Retrieve session status with RID
retrieveSessionStatus :: String -> String -> IO String 
retrieveSessionStatus provider' rid = do
  if provider' == "ebi"
     then do
       statusXml <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/status/" ++ rid)
       let statusXMLString = L8.unpack statusXml
       putStrLn "EBI statusXMLString"
       return statusXMLString
     else do
       statusXml <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=" ++ rid)
       let statusXMLString = L8.unpack statusXml
       return statusXMLString
  
-- | Retrieve result in blastxml format with RID 
retrieveResult :: String -> String -> IO (Either String BlastResult)
retrieveResult provider' rid = do
  if provider' == "ebi"
     then do
       statusXml <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/result/" ++ rid ++ "/xml")
       resultXML <- parseXML statusXml
       return (Right resultXML)
     else do
       statusXml <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=XML&FORMAT_OBJECT=Alignment&CMD=Get")
       resultXML <- parseXML statusXml
       return (Right resultXML)
 
-- | Check if job results are ready and then retrieves results
--   If a walltime in microseconds was set query retrieval will termiate after it is consumed and return a Left result
checkSessionStatus :: String -> String -> Maybe Int -> Int -> IO (Either String BlastResult)
checkSessionStatus provider' rid walltime consumedTime = do
    threadDelay 120000000
    status <- retrieveSessionStatus provider' rid
    if (isNothing walltime)
       then do
         waitOrRetrieve provider' status rid walltime consumedTime
       else do
         if (consumedTime < (fromJust walltime))
           then do 
             waitOrRetrieve provider' status rid walltime (consumedTime + 120000000)
           else do 
             let exceptionMessage = "BLASTHTTP: Query did not return result within walltime"
             return (Left exceptionMessage)

waitOrRetrieve :: String -> String -> String -> Maybe Int -> Int -> IO (Either String BlastResult)
waitOrRetrieve provider' status rid walltime consumedTime
  | provider' == "ebi" = waitOrRetrieveEBI status rid walltime consumedTime
  | otherwise = waitOrRetrieveNCBI status rid walltime consumedTime

waitOrRetrieveEBI :: String -> String -> Maybe Int -> Int -> IO (Either String BlastResult)
waitOrRetrieveEBI status rid walltime consumedTime
  | "FINISHED" `isInfixOf` status = retrieveResult "ebi" rid
  | "FAILURE" `isInfixOf` status = do
      let exceptionMessage = "BLASTHTTP: The EBI blast job failed."
      return (Left exceptionMessage)
  | "ERROR" `isInfixOf` status = do
      let exceptionMessage = "BLASTHTTP: An error occurred attempting to get the EBI blast job status."
      return (Left exceptionMessage)
  | "NOT_FOUND" `isInfixOf` status = do
      let exceptionMessage = "BLASTHTTP: The EBI blast job cannot be found."
      return (Left exceptionMessage)
-- RUNNING
  | otherwise = checkSessionStatus "ebi" rid walltime consumedTime

waitOrRetrieveNCBI :: String -> String -> Maybe Int -> Int -> IO (Either String BlastResult)
waitOrRetrieveNCBI status rid walltime consumedTime
  | "Status=READY" `isInfixOf` status = retrieveResult "ncbi" rid
  | "Status=FAILURE" `isInfixOf` status = do
      let exceptionMessage = "Search $rid failed; please report to blast-help at ncbi.nlm.nih.gov.\n"
      return (Left exceptionMessage)
  | "Status=UNKNOWN" `isInfixOf` status = do
      let exceptionMessage = "Search $rid expired.\n"
      return (Left exceptionMessage)
  | "Status=WAITING" `isInfixOf` status = do
      checkSessionStatus "ncbi" rid walltime consumedTime
  --Unexpected status, return Left
  | otherwise = do
      let exceptionMessage = "Status has unexpected value " ++ status ++ " - aborting blast search\n"
      return (Left exceptionMessage)

-- | Sends Query and retrieves result on reaching READY status, will return exeption message if no query sequence has been provided 
performQuery :: String -> String -> String -> [Sequence] -> Maybe String -> Maybe Int -> IO (Either String BlastResult)                               
performQuery provider' program' database' querySequences' optionalArgumentMaybe walltime
  | null querySequences' = do 
      let exceptionMessage = "Error - no query sequence provided"
      return (Left exceptionMessage)
  | otherwise = do
     let sequenceString = urlEncode (concatMap showSequenceString querySequences')
     rid <- startSession provider' program' database' sequenceString optionalArgumentMaybe
     checkSessionStatus provider' rid walltime (0 :: Int)

showSequenceString :: Sequence -> String
showSequenceString fastaSequence = sequenceString
  where sequenceHeader = ">" ++ L8.unpack (unSL (seqheader fastaSequence)) ++ "\n"
        sequenceData = L8.unpack (unSD (seqdata fastaSequence)) ++ "\n"
        sequenceString = sequenceHeader ++ sequenceData

-- | Retrieve Blast results in BlastXML format from the NCBI REST Blast interface
-- The querySequence has to be provided, all other parameters are optional and can be set to Nothing
-- optionalArguments is attached to the query as is .e.g: "&ALIGNMENTS=250"
blastHTTP :: BlastHTTPQuery -> IO (Either String BlastResult)
blastHTTP (BlastHTTPQuery provider' program' database' querySequences' optionalArguments' walltime') = do
  let defaultProvider = "ncbi"
  let defaultProgram = "blastn"
  let defaultDatabase = "refseq_genomic"   
  let defaultWalltime = Nothing
  let selectedProvider = fromMaybe defaultProvider provider'
  let selectedProgram = fromMaybe defaultProgram program'
  let selectedDatabase = fromMaybe defaultDatabase database'  
  let selectedWalltime = maybe defaultWalltime Just walltime'
  --walltime of 1h in microseconds
  --let walltime = Just (7200000000 ::Int)
  performQuery selectedProvider selectedProgram selectedDatabase querySequences' optionalArguments' selectedWalltime