{-# LANGUAGE OverloadedStrings #-}

-- | Searches a provided sequence with the NCBI Blast REST service and returns a blast result in xml format as BlastResult.
--
-- The function blastHTTP takes the BlastHTTPQuery datatype as argument, which contains following elements:
--
-- 1. program:  Selects the blast-program to be used for the query. Example values are blastn, blastp, blastx,.. If Nothing is used as argument the function will default to blastn. Type: Maybe String
--
-- 2. database: Selects the database to be queried against. Example values are refseq_genomic, nr, est,.. Please consider that the database must be chosen in accordance with the blastprogram. Default value: refseq_genomic. Type: Maybe String
--
-- 3. querySequences: nucleotides or protein sequences, depending on the blast program used. If no sequence is provided an exception as String will be produced. Type: [Sequence]
--
-- 4. optionalArguments: This argument is optional and will filter the result if provided. Type: Maybe String
--
-- 5. optionalWalltime: Optional walltime in mircroseconds. If specified, will terminate the query after reaching the timelimit and return Left. Type: Maybe Int
--
-- and returns Either a BlastResult (Right) on success or an exception as String (Left)
--
-- If you plan to submit more than 20 searches in one session, please look up the Usage Guidelines in the webservice information <http://www.ncbi.nlm.nih.gov/BLAST/developer.shtml>.
module Bio.BlastHTTP ( BlastHTTPQuery (..),
                       blastHTTP,
                       blastTabularHTTP,
                     ) where

import Network.HTTP.Conduit
import qualified Data.ByteString.Lazy.Char8 as L8
import qualified Data.ByteString.Char8 as B
import qualified Control.Monad as CM
import Bio.BlastXML
import Text.XML.HXT.Core
import Network
import Data.List
import Control.Concurrent
import Data.Maybe
import Bio.Core.Sequence
import Bio.Sequence.Fasta
import Network.HTTP.Base
import Biobase.BLAST.Import
import Biobase.BLAST.Types
import qualified Data.Either.Unwrap as E
import Data.Int

data BlastHTTPQuery = BlastHTTPQuery
  { provider :: Maybe String
  , program :: Maybe String
  , database :: Maybe String
  , querySequences :: [Sequence]
  , optionalArguments :: Maybe String
  , optionalWalltime :: Maybe Int
  }
  deriving (Show, Eq)

-- | Parse HTML results into Xml Tree datastructure
parseHTML :: String -> IOStateArrow s0 b0 XmlTree
parseHTML = readString [withParseHTML yes, withWarnings no]

-- | Gets all subtrees with the specified id attribute
atId :: ArrowXml a =>  String -> a XmlTree XmlTree
atId elementId = deep (isElem >>> hasAttrValue "id" (== elementId))

-- | Send query and parse RID from retrieved HTML
startSession :: String -> String -> String -> String -> Maybe String -> IO String
startSession provider' program' database' querySequences' optionalArguments'
  | provider' == "ebi" = startSessionEBI program' database' querySequences' optionalArguments'
  | otherwise = startSessionNCBI program' database' querySequences' optionalArguments'

startSessionEBI :: String -> String -> String -> Maybe String -> IO String
startSessionEBI  program' database' querySequences' optionalArguments' = do
  requestXml <- withSocketsDo
      $ sendQueryEBI program' database' querySequences' optionalArguments'
  let requestID = L8.unpack requestXml
  return requestID

startSessionNCBI :: String -> String -> String -> Maybe String -> IO String
startSessionNCBI program' database' querySequences' optionalArguments' = do
  requestXml <- withSocketsDo
      $ sendQueryNCBI program' database' querySequences' optionalArguments'
  let requestXMLString = L8.unpack requestXml
  CM.liftM head (runX $ parseHTML requestXMLString //> atId "rid" >>> getAttrValue "value")

-- | Send query with or without optional arguments and return response HTML
sendQueryEBI :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQueryEBI program' database' querySequences' _ = do
  putStrLn "Making HTTP request"
  res <- do
    --initReq <- parseUrl "http://postcatcher.in/catchers/541811052cb53502000001a7"
    initReq <- parseUrlThrow "http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/run"
    let req = (flip urlEncodedBody) initReq $
             [ ("email", "florian.eggenhofer@univie.ac.at")
             , ("program", (B.pack program'))
             , ("database", (B.pack database'))
             , ("stype", "dna")
             , ("sequence", (B.pack querySequences'))
             ]
    newManager tlsManagerSettings >>= httpLbs req
        { method = "POST" }
  putStrLn "EBI Response"
  print res
  putStrLn "EBI Response Body"
  print (responseBody res)
  return (responseBody res)

-- | Send query with or without optional arguments and return response HTML
sendQueryNCBI :: String -> String -> String -> Maybe String -> IO L8.ByteString
sendQueryNCBI program' database' querySequences' optionalArguments'
  | isJust optionalArguments' = simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program' ++ "&DATABASE=" ++ database' ++ fromJust optionalArguments' ++ "&QUERY=" ++ querySequences')
  | otherwise = simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Put&PROGRAM=" ++ program' ++ "&DATABASE=" ++ database' ++ "&QUERY=" ++ querySequences')

-- | Retrieve session status with RID
retrieveSessionStatus :: String -> String -> IO String
retrieveSessionStatus provider' rid = do
  if provider' == "ebi"
     then do
       statusXml <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/status/" ++ rid)
       let statusXMLString = L8.unpack statusXml
       putStrLn "EBI statusXMLString"
       return statusXMLString
     else do
       statusXml <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?CMD=Get&FORMAT_OBJECT=SearchInfo&RID=" ++ rid)
       let statusXMLString = L8.unpack statusXml
       return statusXMLString

-- | Retrieve result in blastxml format with RID
retrieveResult :: String -> String -> IO (Either String BlastResult)
retrieveResult provider' rid = do
  if provider' == "ebi"
     then do
       statusXml <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/result/" ++ rid ++ "/xml")
       resultXML <- parseXML statusXml
       return (Right resultXML)
     else do
       resultResponse <- withSocketsDo $ simpleHttp ("http://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=XML&FORMAT_OBJECT=Alignment&CMD=Get")
       resultXML <- parseXML resultResponse
       let rightXML = Right resultXML
       return rightXML

-- | Retrieve result in blast tabular format with RID
retrieveTabularResult :: String -> String -> IO (Either String [BlastTabularResult])
retrieveTabularResult provider' rid = do
  if provider' == "ebi"
     then do
       resultResponse <- withSocketsDo $ simpleHttp ("http://www.ebi.ac.uk/Tools/services/rest/ncbiblast/result/" ++ rid ++ "/Tabular")
       let resultHeaderLessResponse = L8.drop (0  :: Int64) resultResponse
       let resultTabular = parseTabularHTTPBlasts resultHeaderLessResponse
       return (Right resultTabular)
     else do
       resultResponse <- withSocketsDo $ simpleHttp ("https://www.ncbi.nlm.nih.gov/blast/Blast.cgi?RESULTS_FILE=on&RID=" ++ rid ++ "&FORMAT_TYPE=Tabular&FORMAT_OBJECT=Alignment&CMD=Get")
       let resultHeaderLessResponse = L8.drop (60  :: Int64) resultResponse
       let resultTabular = parseTabularHTTPBlasts resultHeaderLessResponse
       return (Right resultTabular)


-- | Check if job results are ready and then retrieves results
--   If a walltime in microseconds was set query retrieval will termiate after it is consumed and return a Left result
checkSessionStatus :: String -> String -> Maybe Int -> Int -> IO (Either String String)
checkSessionStatus provider' rid walltime consumedTime = do
    threadDelay 120000000
    status <- retrieveSessionStatus provider' rid
    if (isNothing walltime)
       then do
         waitOrRetrieve provider' status rid walltime consumedTime
       else do
         if (consumedTime < (fromJust walltime))
           then do
             waitOrRetrieve provider' status rid walltime (consumedTime + 120000000)
           else do
             let exceptionMessage = "BLASTHTTP: Query did not return result within walltime"
             return (Left exceptionMessage)

waitOrRetrieve :: String -> String -> String -> Maybe Int -> Int -> IO (Either String String)
waitOrRetrieve provider' status rid walltime consumedTime
  | provider' == "ebi" = waitOrRetrieveEBI status rid walltime consumedTime
  | otherwise = waitOrRetrieveNCBI status rid walltime consumedTime

waitOrRetrieveEBI :: String -> String -> Maybe Int -> Int -> IO (Either String String)
waitOrRetrieveEBI status rid walltime consumedTime
  | "FINISHED" `isInfixOf` status = return (Right rid) -- retrieveResult ouputFormat "ebi" rid
  | "FAILURE" `isInfixOf` status = do
      let exceptionMessage = "BLASTHTTP: The EBI blast job failed."
      return (Left exceptionMessage)
  | "ERROR" `isInfixOf` status = do
      let exceptionMessage = "BLASTHTTP: An error occurred attempting to get the EBI blast job status."
      return (Left exceptionMessage)
  | "NOT_FOUND" `isInfixOf` status = do
      let exceptionMessage = "BLASTHTTP: The EBI blast job cannot be found."
      return (Left exceptionMessage)
-- RUNNING
  | otherwise = checkSessionStatus "ebi" rid walltime consumedTime

waitOrRetrieveNCBI :: String -> String -> Maybe Int -> Int -> IO (Either String String)
waitOrRetrieveNCBI status rid walltime consumedTime
  | "Status=READY" `isInfixOf` status = return (Right rid) -- retrieveResult "ncbi" rid
  | "Status=FAILURE" `isInfixOf` status = do
      let exceptionMessage = "Search $rid failed; please report to blast-help at ncbi.nlm.nih.gov.\n"
      return (Left exceptionMessage)
  | "Status=UNKNOWN" `isInfixOf` status = do
      let exceptionMessage = "Search $rid expired.\n"
      return (Left exceptionMessage)
  | "Status=WAITING" `isInfixOf` status = do
      checkSessionStatus "ncbi" rid walltime consumedTime
  --Unexpected status, return Left
  | otherwise = do
      let exceptionMessage = "Status has unexpected value " ++ status ++ " - aborting blast search\n"
      return (Left exceptionMessage)

-- | Sends Query and retrieves result on reaching READY status, will return exeption message if no query sequence has been provided
performQuery :: String -> String -> String -> [Sequence] -> Maybe String -> Maybe Int -> IO (Either String BlastResult)
performQuery provider' program' database' querySequences' optionalArgumentMaybe walltime
  | null querySequences' = do
      let exceptionMessage = "Error - no query sequence provided"
      return (Left exceptionMessage)
  | otherwise = do
     let sequenceString = urlEncode (concatMap showSequenceString querySequences')
     rid <- startSession provider' program' database' sequenceString optionalArgumentMaybe
     sessionStatus <- checkSessionStatus provider' rid walltime (0 :: Int)
     if E.isRight sessionStatus
        then retrieveResult provider' rid
        else return (Left (E.fromLeft sessionStatus))

showSequenceString :: Sequence -> String
showSequenceString fastaSequence = sequenceString
  where sequenceHeader = ">" ++ L8.unpack (unSL (seqheader fastaSequence)) ++ "\n"
        sequenceData = L8.unpack (unSD (seqdata fastaSequence)) ++ "\n"
        sequenceString = sequenceHeader ++ sequenceData

-- | Retrieve Blast results in BlastXML format from the NCBI REST Blast interface
-- The querySequence has to be provided, all other parameters are optional and can be set to Nothing
-- optionalArguments is attached to the query as is .e.g: "&ALIGNMENTS=250"
blastHTTP :: BlastHTTPQuery -> IO (Either String BlastResult)
blastHTTP (BlastHTTPQuery provider' program' database' querySequences' optionalArguments' walltime') = do
  let defaultProvider = "ncbi"
  let defaultProgram = "blastn"
  let defaultDatabase = "refseq_genomic"
  let defaultWalltime = Nothing
  let selectedProvider = fromMaybe defaultProvider provider'
  let selectedProgram = fromMaybe defaultProgram program'
  let selectedDatabase = fromMaybe defaultDatabase database'
  let selectedWalltime = maybe defaultWalltime Just walltime'
  --walltime of 1h in microseconds
  --let walltime = Just (7200000000 ::Int)
  performQuery selectedProvider selectedProgram selectedDatabase querySequences' optionalArguments' selectedWalltime

-- | Retrieve Blast results in Blast tabular format from the NCBI REST Blast interface
-- The querySequence has to be provided, all other parameters are optional and can be set to Nothing
-- optionalArguments is attached to the query as is .e.g: "&ALIGNMENTS=250"
blastTabularHTTP :: BlastHTTPQuery -> IO (Either String [BlastTabularResult])
blastTabularHTTP (BlastHTTPQuery provider' program' database' querySequences' optionalArguments' walltime') = do
  let defaultProvider = "ncbi"
  let defaultProgram = "blastn"
  let defaultDatabase = "refseq_genomic"
  let defaultWalltime = Nothing
  let selectedProvider = fromMaybe defaultProvider provider'
  let selectedProgram = fromMaybe defaultProgram program'
  let selectedDatabase = fromMaybe defaultDatabase database'
  let selectedWalltime = maybe defaultWalltime Just walltime'
  --walltime of 1h in microseconds
  --let walltime = Just (7200000000 ::Int)
  performTabularQuery selectedProvider selectedProgram selectedDatabase querySequences' optionalArguments' selectedWalltime

-- | Sends Query and retrieves result on reaching READY status, will return exeption message if no query sequence has been provided
performTabularQuery :: String -> String -> String -> [Sequence] -> Maybe String -> Maybe Int -> IO (Either String [BlastTabularResult])
performTabularQuery provider' program' database' querySequences' optionalArgumentMaybe walltime
  | null querySequences' = do
      let exceptionMessage = "Error - no query sequence provided"
      return (Left exceptionMessage)
  | otherwise = do
     let sequenceString = urlEncode (concatMap showSequenceString querySequences')
     rid <- startSession provider' program' database' sequenceString (Just (maybe "&FORMAT_TYPE=TABULAR" ("&FORMAT_TYPE=TABULAR" ++) optionalArgumentMaybe))
     sessionStatus <- checkSessionStatus provider' rid walltime (0 :: Int)
     if E.isRight sessionStatus
        then retrieveTabularResult provider' rid
        else return (Left (E.fromLeft sessionStatus))