-- | This module contains functions for RNAlien

module Bio.RNAlienLibrary (
                           module Bio.RNAlienData,
                           createSessionID,
                           logMessage,
                           logEither,
                           modelConstructer,
                           constructTaxonomyRecordsCSVTable,
                           resultSummary,
                           setVerbose,
                           logToolVersions,
                           checkTools,
                           systemCMsearch,
                           readCMSearch,
                           readCMSearches,
                           compareCM,
                           parseCMSearch,
                           cmSearchsubString,
                           setInitialTaxId,
                           evaluateConstructionResult,
                           readCMstat,
                           parseCMstat,
                           checkNCBIConnection,
                           preprocessClustalForRNAz,
                           preprocessClustalForRNAzExternal,
                           rnaZEvalOutput,
                           reformatFasta,
                           checkTaxonomyRestriction,
                           evaluePartitionTrimCMsearchHits
                           )
where
   
import System.Process 
import qualified System.FilePath as FP
import Text.ParserCombinators.Parsec 
import Data.List
import Data.Char
import Bio.Core.Sequence 
import Bio.Sequence.Fasta 
import Bio.BlastXML
import Bio.ClustalParser
import Data.Int (Int16)
import Bio.RNAlienData
import qualified Data.ByteString.Lazy.Char8 as L
import Bio.Taxonomy 
import Data.Either.Unwrap
import Data.Maybe
import Bio.EntrezHTTP 
import System.Exit
import Data.Either (lefts,rights,Either)
import qualified Text.EditDistance as ED   
import qualified Data.Vector as V
import Control.Concurrent 
import System.Random
import Data.Csv
import Data.Matrix
import Bio.BlastHTTP 
import Data.Clustering.Hierarchical
import System.Directory
import System.Console.CmdArgs
import qualified Control.Exception.Base as CE
import Bio.RNAfoldParser
import Bio.RNAalifoldParser
import Bio.RNAzParser
import Network.HTTP
import qualified Bio.RNAcodeParser as RC
import Bio.RNAcentralHTTP
import Bio.InfernalParser
import qualified Data.Text as T
import qualified Data.Text.IO as TI
import qualified Data.Text.Encoding as DTE
import qualified Data.Text.Lazy.Encoding as E
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.IO as TIO

-- | Initial RNA family model construction - generates iteration number, seed alignment and model
modelConstructer :: StaticOptions -> ModelConstruction -> IO ModelConstruction
modelConstructer staticOptions modelConstruction = do
  logMessage ("Iteration: " ++ show (iterationNumber modelConstruction) ++ "\n") (tempDirPath staticOptions)
  iterationSummary modelConstruction staticOptions
  let currentIterationNumber = (iterationNumber modelConstruction)
  let foundSequenceNumber = length (concatMap sequenceRecords (taxRecords modelConstruction))
  --extract queries
  let queries = extractQueries foundSequenceNumber modelConstruction
  logVerboseMessage (verbositySwitch staticOptions) ("Queries:" ++ show queries ++ "\n") (tempDirPath staticOptions)
  let iterationDirectory = (tempDirPath staticOptions) ++ (show currentIterationNumber) ++ "/"
  let maybeLastTaxId = extractLastTaxId (taxonomicContext modelConstruction)
  if (isNothing maybeLastTaxId) then logMessage ("Lineage: Could not extract last tax id \n") (tempDirPath staticOptions) else (return ())
  --If highest node in linage was used as upper taxonomy limit, taxonomic tree is exhausted
  if (maybe True (\uppertaxlimit -> maybe True (\lastTaxId -> uppertaxlimit /= lastTaxId) maybeLastTaxId) (upperTaxonomyLimit modelConstruction))
     then do
       createDirectory (iterationDirectory) 
       let (upperTaxLimit,lowerTaxLimit) = setTaxonomicContextEntrez currentIterationNumber (taxonomicContext modelConstruction) (upperTaxonomyLimit modelConstruction)
       logVerboseMessage (verbositySwitch staticOptions) ("Upper taxonomy limit: " ++ (show upperTaxLimit) ++ "\n " ++ "Lower taxonomy limit: "++ show lowerTaxLimit ++ "\n") (tempDirPath staticOptions)
       --search queries
       let expectThreshold = setBlastExpectThreshold modelConstruction
       searchResults <- catchAll (searchCandidates staticOptions Nothing currentIterationNumber upperTaxLimit lowerTaxLimit expectThreshold queries) 
                        (\e -> do logWarning ("Warning: Search results iteration" ++ show (iterationNumber modelConstruction) ++ " - exception: " ++ show e) (tempDirPath staticOptions)
                                  return (SearchResult [] Nothing))
       currentTaxonomicContext <- getTaxonomicContextEntrez upperTaxLimit (taxonomicContext modelConstruction)
       if null (candidates searchResults)
         then do
            alignmentConstructionWithoutCandidates currentTaxonomicContext upperTaxLimit staticOptions modelConstruction
         else do            
            alignmentConstructionWithCandidates currentTaxonomicContext upperTaxLimit searchResults staticOptions modelConstruction
     else do
       logMessage ("Message: Modelconstruction complete: Out of queries or taxonomic tree exhausted\n") (tempDirPath staticOptions)
       modelConstructionResult staticOptions modelConstruction

catchAll :: IO a -> (CE.SomeException -> IO a) -> IO a
catchAll = CE.catch

setInitialTaxId :: Maybe String -> String -> Maybe Int -> Sequence -> IO (Maybe Int)
setInitialTaxId inputBlastDatabase tempdir inputTaxId inputSequence = do
  if (isNothing inputTaxId)
    then do
      initialTaxId <- findTaxonomyStart inputBlastDatabase tempdir inputSequence
      return (Just initialTaxId)
    else do 
        return inputTaxId

extractLastTaxId :: Maybe Taxon -> Maybe Int
extractLastTaxId taxon 
  | isNothing taxon = Nothing
  | V.null lineageExVector = Nothing
  | otherwise = Just (lineageTaxId (V.head lineageExVector))
    where lineageExVector = V.fromList (lineageEx (fromJust taxon))

modelConstructionResult :: StaticOptions -> ModelConstruction -> IO ModelConstruction
modelConstructionResult staticOptions modelConstruction = do
  let currentIterationNumber = iterationNumber modelConstruction
  let outputDirectory = tempDirPath staticOptions
  logMessage ("Global search iteration: " ++ show currentIterationNumber ++ "\n") outputDirectory
  iterationSummary modelConstruction staticOptions
  let foundSequenceNumber = length (concatMap sequenceRecords (taxRecords modelConstruction))
  --extract queries
  --let querySeqIds = selectedQueries modelConstruction ---
  let queries = extractQueries foundSequenceNumber modelConstruction ---
  --let alignedSequences' = map nucleotideSequence (concatMap sequenceRecords (taxRecords modelConstruction)) ---
  logVerboseMessage (verbositySwitch staticOptions) ("Queries:" ++ show queries ++ "\n") outputDirectory
  let iterationDirectory = outputDirectory ++ (show currentIterationNumber) ++ "/"
  createDirectory (iterationDirectory)
  let logFileDirectoryPath = iterationDirectory ++ "log"
  createDirectoryIfMissing False logFileDirectoryPath
  let expectThreshold = setBlastExpectThreshold modelConstruction
  (alignmentResults,currentPotentialMembers) <- if isJust (taxRestriction staticOptions)
    then do
      --taxonomic restriction
      let (upperTaxLimit,lowerTaxLimit) = setRestrictedTaxonomyLimits (fromJust (taxRestriction staticOptions))
      restrictedCandidates <- catchAll (searchCandidates staticOptions (taxRestriction staticOptions) currentIterationNumber upperTaxLimit lowerTaxLimit expectThreshold queries)
                     (\e -> do logWarning ("Warning: Search results iteration" ++ show currentIterationNumber ++ " - exception: " ++ show e) outputDirectory
                               return (SearchResult [] Nothing))
      let uniqueCandidates = filterDuplicates modelConstruction restrictedCandidates
      (restrictedAlignmentResults,restrictedPotentialMembers) <- catchAll (alignCandidates staticOptions modelConstruction (fromJust (taxRestriction staticOptions)) uniqueCandidates)
                           (\e -> do logWarning ("Warning: Alignment results iteration" ++ show currentIterationNumber ++ " - exception: " ++ show e) outputDirectory
                                     return  ([],[]))
      let currentPotentialMembers = [SearchResult restrictedPotentialMembers (blastDatabaseSize restrictedCandidates)]
      return (restrictedAlignmentResults,currentPotentialMembers)
    else do
      --taxonomic context archea
      let (upperTaxLimit1,lowerTaxLimit1) = (Just (2157 :: Int), Nothing)
      candidates1 <- catchAll  (searchCandidates staticOptions (Just "archea") currentIterationNumber upperTaxLimit1 lowerTaxLimit1 expectThreshold queries)
                     (\e -> do logWarning ("Warning: Search results iteration" ++ show currentIterationNumber ++ " - exception: " ++ show e) outputDirectory
                               return (SearchResult [] Nothing))
      let uniqueCandidates1 = filterDuplicates modelConstruction candidates1 
      (alignmentResults1,potentialMembers1) <- catchAll (alignCandidates staticOptions modelConstruction "archea" uniqueCandidates1)
                           (\e -> do logWarning ("Warning: Alignment results iteration" ++ show currentIterationNumber ++ " - exception: " ++ show e) outputDirectory
                                     return  ([],[]))
      --taxonomic context bacteria
      let (upperTaxLimit2,lowerTaxLimit2) = (Just (2 :: Int), Nothing)
      candidates2 <- catchAll (searchCandidates staticOptions (Just "bacteria") currentIterationNumber upperTaxLimit2 lowerTaxLimit2 expectThreshold queries)
                     (\e -> do logWarning ("Warning: Search results iteration" ++ show currentIterationNumber ++ " - exception: " ++ show e) outputDirectory
                               return (SearchResult [] Nothing))
      let uniqueCandidates2 = filterDuplicates modelConstruction candidates2
      (alignmentResults2,potentialMembers2) <- catchAll (alignCandidates staticOptions modelConstruction "bacteria" uniqueCandidates2)
                           (\e -> do logWarning ("Warning: Alignment results iteration" ++ show currentIterationNumber ++ " - exception: " ++ show e) outputDirectory
                                     return  ([],[]))
      --taxonomic context eukaryia
      let (upperTaxLimit3,lowerTaxLimit3) = (Just (2759 :: Int), Nothing)
      candidates3 <- catchAll (searchCandidates staticOptions (Just "eukaryia") currentIterationNumber upperTaxLimit3 lowerTaxLimit3 expectThreshold queries)
                     (\e -> do logWarning ("Warning: Search results iteration" ++ show currentIterationNumber ++ " - exception: " ++ show e) outputDirectory
                               return (SearchResult [] Nothing))
      let uniqueCandidates3 = filterDuplicates modelConstruction candidates3
      (alignmentResults3,potentialMembers3) <- catchAll (alignCandidates staticOptions modelConstruction "eukaryia" uniqueCandidates3)
                           (\e -> do logWarning ("Warning: Alignment results iteration" ++ show currentIterationNumber ++ " - exception: " ++ show e) outputDirectory
                                     return  ([],[]))
      let alignmentResults = alignmentResults1 ++ alignmentResults2 ++ alignmentResults3
      let currentPotentialMembers = [SearchResult potentialMembers1 (blastDatabaseSize candidates1), SearchResult potentialMembers2 (blastDatabaseSize candidates2), SearchResult potentialMembers3 (blastDatabaseSize candidates3)]
      return (alignmentResults,currentPotentialMembers)
  let preliminaryFastaPath = iterationDirectory ++ "model.fa"
  let preliminaryCMPath = iterationDirectory ++ "model.cm"
  let preliminaryAlignmentPath = iterationDirectory ++ "model.stockholm"
  let preliminaryCMLogPath = iterationDirectory ++ "model.cm.log"
  let nextModelConstructionInput = constructNext currentIterationNumber modelConstruction alignmentResults Nothing Nothing [] currentPotentialMembers (alignmentModeInfernal modelConstruction)
  if (length alignmentResults == 0) && (not (alignmentModeInfernal modelConstruction))
    then do
      logVerboseMessage (verbositySwitch staticOptions) ("Alignment result initial mode\n") outputDirectory
      logMessage ("Message: No sequences found that statisfy filters. Try to reconstruct model with less strict cutoff parameters.") outputDirectory
      let alignedSequences = extractAlignedSequences (iterationNumber modelConstruction) modelConstruction
      let alignmentSequences = map snd (V.toList (V.concat [alignedSequences]))
      writeFasta preliminaryFastaPath alignmentSequences
      let cmBuildFilepath = iterationDirectory ++ "model" ++ ".cmbuild"
      let refinedAlignmentFilepath = iterationDirectory ++ "modelrefined" ++ ".stockholm"
      let cmBuildOptions ="--refine " ++ refinedAlignmentFilepath
      let foldFilepath = iterationDirectory ++ "model" ++ ".fold"
      _ <- systemRNAfold preliminaryFastaPath foldFilepath
      foldoutput <- readRNAfold foldFilepath
      let seqStructure = foldSecondaryStructure (fromRight foldoutput)
      let stockholAlignment = convertFastaFoldStockholm (head alignmentSequences) seqStructure
      writeFile preliminaryAlignmentPath stockholAlignment
      _ <- systemCMbuild cmBuildOptions preliminaryAlignmentPath preliminaryCMPath cmBuildFilepath
      _ <- systemCMcalibrate "fast" (cpuThreads staticOptions) preliminaryCMPath preliminaryCMLogPath
      resultModelConstruction <- reevaluatePotentialMembers staticOptions nextModelConstructionInput
      return resultModelConstruction
    else do     
      if (alignmentModeInfernal modelConstruction)
        then do
          logVerboseMessage (verbositySwitch staticOptions) ("Alignment construction with candidates - infernal mode\n") outputDirectory
          constructModel nextModelConstructionInput staticOptions
          writeFile (iterationDirectory ++ "done") ""
          logMessage (iterationSummaryLog nextModelConstructionInput) outputDirectory
          logVerboseMessage (verbositySwitch staticOptions) (show nextModelConstructionInput) outputDirectory
          resultModelConstruction <- reevaluatePotentialMembers staticOptions nextModelConstructionInput
          return resultModelConstruction
        else do
          logVerboseMessage (verbositySwitch staticOptions) ("Alignment construction with candidates - initial mode\n") outputDirectory
          constructModel nextModelConstructionInput staticOptions
          let nextModelConstructionInputInfernalMode = nextModelConstructionInput {alignmentModeInfernal = True}
          logMessage (iterationSummaryLog nextModelConstructionInputInfernalMode) outputDirectory
          logVerboseMessage (verbositySwitch staticOptions) (show nextModelConstructionInputInfernalMode) outputDirectory
          writeFile (iterationDirectory ++ "done") ""
          resultModelConstruction <- reevaluatePotentialMembers staticOptions nextModelConstructionInputInfernalMode
          return resultModelConstruction

-- | Reevaluate collected potential members for inclusion in the result model
reevaluatePotentialMembers :: StaticOptions -> ModelConstruction -> IO ModelConstruction
reevaluatePotentialMembers staticOptions modelConstruction = do
  let currentIterationNumber = iterationNumber modelConstruction
  let outputDirectory = tempDirPath staticOptions
  iterationSummary modelConstruction staticOptions
  logMessage ("Reevaluation of potential members iteration: " ++ show currentIterationNumber ++ "\n") outputDirectory
  let iterationDirectory = outputDirectory ++ (show currentIterationNumber) ++ "/"
  createDirectory (iterationDirectory)
  let indexedPotentialMembers = V.indexed (V.fromList (potentialMembers modelConstruction))
  potentialMembersAlignmentResultVector <- V.mapM (\(i,searchresult) -> (alignCandidates staticOptions modelConstruction (show i ++ "_") searchresult)) indexedPotentialMembers
  let potentialMembersAlignmentResults = V.toList potentialMembersAlignmentResultVector
  let alignmentResults = concatMap fst potentialMembersAlignmentResults
  let discardedMembers = concatMap snd potentialMembersAlignmentResults
  writeFile (outputDirectory  ++ "log/discarded") (concatMap show discardedMembers)
  let resultFastaPath = outputDirectory  ++ "result.fa"
  let resultCMPath = outputDirectory ++ "result.cm"
  let resultAlignmentPath = outputDirectory ++ "result.stockholm"
  let resultCMLogPath = outputDirectory ++ "log/result.cm.log"
  if null alignmentResults
    then do
      let lastIterationFastaPath = outputDirectory ++ show (currentIterationNumber - 1)++ "/model.fa"
      let lastIterationAlignmentPath = outputDirectory ++ show (currentIterationNumber - 1)  ++ "/model.stockholm"
      let lastIterationCMPath = outputDirectory ++ show (currentIterationNumber - 1)++ "/model.cm"
      copyFile lastIterationCMPath resultCMPath
      --copyFile lastIterationCMPath (resultCMPath ++ ".bak1")
      copyFile lastIterationFastaPath resultFastaPath
      copyFile lastIterationAlignmentPath resultAlignmentPath
      _ <- systemCMcalibrate "standard" (cpuThreads staticOptions) resultCMPath resultCMLogPath
      writeFile (iterationDirectory ++ "done") ""
      return modelConstruction
    else do
      let lastIterationFastaPath = outputDirectory ++ show currentIterationNumber ++ "/model.fa"
      let lastIterationAlignmentPath = outputDirectory ++ show currentIterationNumber  ++ "/model.stockholm"
      let lastIterationCMPath = outputDirectory ++ show currentIterationNumber ++ "/model.cm"
      logVerboseMessage (verbositySwitch staticOptions) ("Alignment construction with candidates - infernal mode\n") outputDirectory
      let nextModelConstructionInput = constructNext currentIterationNumber modelConstruction alignmentResults Nothing Nothing [] [] (alignmentModeInfernal modelConstruction)
      constructModel nextModelConstructionInput staticOptions
      copyFile lastIterationCMPath resultCMPath
      --debug
      --copyFile lastIterationCMPath (resultCMPath ++ ".bak2")
      copyFile lastIterationFastaPath resultFastaPath
      copyFile lastIterationAlignmentPath resultAlignmentPath
      logMessage (iterationSummaryLog nextModelConstructionInput) outputDirectory
      logVerboseMessage (verbositySwitch staticOptions) (show nextModelConstructionInput) outputDirectory
      _ <- systemCMcalibrate "standard" (cpuThreads staticOptions) resultCMPath resultCMLogPath
      writeFile (iterationDirectory ++ "done") ""
      return nextModelConstructionInput
  
---------------------------------------------------------
                  
alignmentConstructionWithCandidates :: Maybe Taxon -> Maybe Int -> SearchResult -> StaticOptions -> ModelConstruction -> IO ModelConstruction
alignmentConstructionWithCandidates currentTaxonomicContext currentUpperTaxonomyLimit searchResults staticOptions modelConstruction = do
    --candidates usedUpperTaxonomyLimit blastDatabaseSize 
    let currentIterationNumber = (iterationNumber modelConstruction)
    let iterationDirectory = (tempDirPath staticOptions) ++ (show currentIterationNumber) ++ "/"                             
    --let usedUpperTaxonomyLimit = (snd (head candidates))                               
    --align search result
    (alignmentResults,potentialMemberEntries) <- catchAll (alignCandidates staticOptions modelConstruction "" searchResults)
                        (\e -> do logWarning ("Warning: Alignment results iteration" ++ show (iterationNumber modelConstruction) ++ " - exception: " ++ show e) (tempDirPath staticOptions)
                                  return ([],[]))
    let currentPotentialMembers = [SearchResult potentialMemberEntries (blastDatabaseSize searchResults)]
    if (length alignmentResults == 0) && (not (alignmentModeInfernal modelConstruction))
      then do
        logVerboseMessage (verbositySwitch staticOptions) ("Alignment construction with candidates - length 1 - inital mode" ++ "\n") (tempDirPath staticOptions)
        --too few sequences for alignment. because of lack in sequences no cm was constructed before
        --reusing previous modelconstruction with increased upperTaxonomyLimit but include found sequence
        --prepare next iteration
        let newTaxEntries = (taxRecords modelConstruction) ++ (buildTaxRecords alignmentResults currentIterationNumber)
        let nextModelConstructionInputWithThreshold = modelConstruction {iterationNumber = (currentIterationNumber + 1),upperTaxonomyLimit = currentUpperTaxonomyLimit, taxRecords = newTaxEntries,taxonomicContext = currentTaxonomicContext}
        logMessage (iterationSummaryLog nextModelConstructionInputWithThreshold) (tempDirPath staticOptions)
        logVerboseMessage (verbositySwitch staticOptions)  (show nextModelConstructionInputWithThreshold) (tempDirPath staticOptions)     ----      
        writeFile (iterationDirectory ++ "done") ""
        nextModelConstruction <- modelConstructer staticOptions nextModelConstructionInputWithThreshold           
        return nextModelConstruction 
      else do
        if (alignmentModeInfernal modelConstruction)
          then do
            logVerboseMessage (verbositySwitch staticOptions) ("Alignment construction with candidates - infernal mode\n") (tempDirPath staticOptions)
            --prepare next iteration
            let nextModelConstructionInput = constructNext currentIterationNumber modelConstruction alignmentResults currentUpperTaxonomyLimit currentTaxonomicContext [] currentPotentialMembers True        
            constructModel nextModelConstructionInput staticOptions               
            writeFile (iterationDirectory ++ "done") ""
            logMessage (iterationSummaryLog nextModelConstructionInput) (tempDirPath staticOptions)
            logVerboseMessage (verbositySwitch staticOptions)  (show nextModelConstructionInput) (tempDirPath staticOptions)
            --select queries
            currentSelectedQueries <- selectQueries staticOptions modelConstruction alignmentResults
            let nextModelConstructionInputWithQueries = nextModelConstructionInput {selectedQueries = currentSelectedQueries}
            nextModelConstruction <- modelConstructer staticOptions nextModelConstructionInputWithQueries
            return nextModelConstruction
          else do
            logVerboseMessage (verbositySwitch staticOptions) ("Alignment construction with candidates - initial mode\n") (tempDirPath staticOptions)
            --First round enough candidates are available for modelconstruction, alignmentModeInfernal is set to true after this iteration
            --prepare next iteration
            let nextModelConstructionInput = constructNext currentIterationNumber modelConstruction alignmentResults currentUpperTaxonomyLimit currentTaxonomicContext [] currentPotentialMembers False       
            constructModel nextModelConstructionInput staticOptions
            currentSelectedQueries <- selectQueries staticOptions modelConstruction alignmentResults
            --select queries
            let nextModelConstructionInputWithInfernalMode = nextModelConstructionInput {alignmentModeInfernal = True, selectedQueries = currentSelectedQueries}
            logMessage (iterationSummaryLog  nextModelConstructionInputWithInfernalMode) (tempDirPath staticOptions)
            logVerboseMessage (verbositySwitch staticOptions)  (show  nextModelConstructionInputWithInfernalMode) (tempDirPath staticOptions)
            writeFile (iterationDirectory ++ "done") ""
            nextModelConstruction <- modelConstructer staticOptions nextModelConstructionInputWithInfernalMode        
            return nextModelConstruction
               
alignmentConstructionWithoutCandidates :: Maybe Taxon -> Maybe Int ->  StaticOptions -> ModelConstruction -> IO ModelConstruction
alignmentConstructionWithoutCandidates currentTaxonomicContext upperTaxLimit staticOptions modelConstruction = do
    let currentIterationNumber = (iterationNumber modelConstruction)
    let iterationDirectory = (tempDirPath staticOptions) ++ (show currentIterationNumber) ++ "/"   
    --Found no new candidates in this iteration, reusing previous modelconstruction with increased upperTaxonomyLimit
    let nextModelConstructionInputWithThreshold = modelConstruction  {iterationNumber = (currentIterationNumber + 1),upperTaxonomyLimit = upperTaxLimit,taxonomicContext = currentTaxonomicContext}
    --copy model and alignment from last iteration in place if present
    let previousIterationCMPath = (tempDirPath staticOptions) ++ (show (currentIterationNumber - 1)) ++ "/model.cm"
    previousIterationCMexists <- doesFileExist previousIterationCMPath
    if previousIterationCMexists
      then do
        logVerboseMessage (verbositySwitch staticOptions) ("Alignment construction no candidates - previous cm\n") (tempDirPath staticOptions)
        let previousIterationFastaPath = (tempDirPath staticOptions) ++ (show (currentIterationNumber - 1)) ++ "/model.fa"
        let previousIterationAlignmentPath = (tempDirPath staticOptions) ++ (show (currentIterationNumber - 1)) ++ "/model.stockholm"
        let thisIterationFastaPath = (tempDirPath staticOptions) ++ (show (currentIterationNumber)) ++ "/model.fa"
        let thisIterationAlignmentPath = (tempDirPath staticOptions) ++ (show (currentIterationNumber)) ++ "/model.stockholm"
        let thisIterationCMPath = (tempDirPath staticOptions) ++ (show (currentIterationNumber)) ++ "/model.cm"
        copyFile previousIterationFastaPath thisIterationFastaPath
        copyFile previousIterationAlignmentPath thisIterationAlignmentPath
        copyFile previousIterationCMPath thisIterationCMPath
        logMessage (iterationSummaryLog nextModelConstructionInputWithThreshold) (tempDirPath staticOptions)
        logVerboseMessage (verbositySwitch staticOptions) (show nextModelConstructionInputWithThreshold) (tempDirPath staticOptions)
        writeFile (iterationDirectory ++ "done") ""
        nextModelConstruction <- modelConstructer staticOptions nextModelConstructionInputWithThreshold           
        return nextModelConstruction
      else do
        logVerboseMessage (verbositySwitch staticOptions) ("Alignment construction no candidates - no previous iteration cm\n") (tempDirPath staticOptions)
        logMessage (iterationSummaryLog nextModelConstructionInputWithThreshold) (tempDirPath staticOptions)
        logVerboseMessage (verbositySwitch staticOptions) (show nextModelConstructionInputWithThreshold) (tempDirPath staticOptions)    ----       
        writeFile (iterationDirectory ++ "done") ""
        nextModelConstruction <- modelConstructer staticOptions nextModelConstructionInputWithThreshold           
        return nextModelConstruction
           
findTaxonomyStart :: Maybe String -> String -> Sequence -> IO Int
findTaxonomyStart inputBlastDatabase temporaryDirectory querySequence = do
  let queryIndexString = "1"
  let hitNumberQuery = buildHitNumberQuery "&HITLIST_SIZE=10" 
  let registrationInfo = buildRegistration "RNAlien" "florian.eggenhofer@univie.ac.at"
  let blastQuery = BlastHTTPQuery (Just "ncbi") (Just "blastn") inputBlastDatabase [querySequence] (Just (hitNumberQuery ++ registrationInfo)) (Just (5400000000 :: Int))
  logMessage ("No tax id provided - Sending find taxonomy start blast query \n") temporaryDirectory
  blastOutput <- CE.catch (blastHTTP blastQuery)
                       (\e -> do let err = show (e :: CE.IOException)
                                 logWarning ("Warning: Blast attempt failed:" ++ " " ++ err) temporaryDirectory
                                 error "findTaxonomyStart: Blast attempt failed"
                                 return (Left ""))
  let logFileDirectoryPath =  temporaryDirectory ++ "taxonomystart" ++ "/" 
  createDirectory logFileDirectoryPath
  writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString  ++ "_1blastOutput") (show blastOutput)
  logEither blastOutput temporaryDirectory 
  let blastHitsArePresent = either (\_ -> False) blastMatchesPresent blastOutput
  if (blastHitsArePresent)
     then do
       let rightBlast = fromRight blastOutput
       let bestHit = getBestHit rightBlast
       bestBlastHitTaxIdOutput <- retrieveBlastHitTaxIdEntrez [bestHit]
       let taxIdFromEntrySummaries = extractTaxIdFromEntrySummaries (snd bestBlastHitTaxIdOutput)
       if (null taxIdFromEntrySummaries) then (error "findTaxonomyStart: - head: empty list of taxonomy entry summary for best hit")  else return ()
       let rightBestTaxIdResult = head taxIdFromEntrySummaries
       logMessage ("Initial TaxId: " ++ (show rightBestTaxIdResult) ++ "\n") temporaryDirectory
       CE.evaluate rightBestTaxIdResult
     else error "Find taxonomy start: Could not find blast hits to use as a taxonomic starting point"

searchCandidates :: StaticOptions -> Maybe String -> Int ->  Maybe Int -> Maybe Int -> Double -> [Sequence] -> IO SearchResult
searchCandidates staticOptions finaliterationprefix iterationnumber upperTaxLimit lowerTaxLimit expectThreshold inputQuerySequences = do
  --let fastaSeqData = seqdata _querySequence
  if (null inputQuerySequences) then error "searchCandidates: - head: empty list of query sequences" else return ()
  let queryLength = fromIntegral (seqlength (head inputQuerySequences))
  let queryIndexString = "1"
  let entrezTaxFilter = buildTaxFilterQuery upperTaxLimit lowerTaxLimit 
  logVerboseMessage (verbositySwitch staticOptions) ("entrezTaxFilter" ++ show entrezTaxFilter ++ "\n") (tempDirPath staticOptions)
  let hitNumberQuery = buildHitNumberQuery "&HITLIST_SIZE=5000&EXPECT=" ++ show expectThreshold
  let registrationInfo = buildRegistration "RNAlien" "florian.eggenhofer@univie.ac.at"
  let softmaskFilter = if (blastSoftmaskingToggle staticOptions) then "&FILTER=True&FILTER=m" else ""
  let blastQuery = BlastHTTPQuery (Just "ncbi") (Just "blastn") (blastDatabase staticOptions) inputQuerySequences  (Just (hitNumberQuery ++ entrezTaxFilter ++ softmaskFilter ++ registrationInfo)) (Just (5400000000 :: Int))
  --appendFile "/scratch/egg/blasttest/queries" ("\nBlast query:\n"  ++ show blastQuery ++ "\n") 
  logVerboseMessage (verbositySwitch staticOptions) ("Sending blast query " ++ (show iterationnumber) ++ "\n") (tempDirPath staticOptions)
  blastOutput <- CE.catch (blastHTTP blastQuery)
                       (\e -> do let err = show (e :: CE.IOException)
                                 logWarning ("Warning: Blast attempt failed:" ++ " " ++ err) (tempDirPath staticOptions)
                                 return (Left ""))
  let logFileDirectoryPath = (tempDirPath staticOptions) ++ (show iterationnumber) ++ "/" ++ (fromMaybe "" finaliterationprefix) ++ "log"
  logDirectoryPresent <- doesDirectoryExist logFileDirectoryPath                      
  if (not logDirectoryPresent)
    then createDirectory (logFileDirectoryPath) else return ()
  writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString  ++ "_1blastOutput") (show blastOutput)
  logEither blastOutput (tempDirPath staticOptions) 
  let blastHitsArePresent = either (const False) blastMatchesPresent blastOutput
  if blastHitsArePresent
     then do
       let rightBlast = fromRight blastOutput
      -- bestBlastHitTaxIdOutput <- retrieveBlastHitTaxIdEntrez [bestHit]
      -- let taxIdFromEntrySummaries = extractTaxIdFromEntrySummaries bestBlastHitTaxIdOutput
      -- if (null taxIdFromEntrySummaries) then (error "searchCandidates: - head: empty list of taxonomy entry summary for best hit")  else return ()
      -- let rightBestTaxIdResult = head taxIdFromEntrySummaries
      -- logVerboseMessage (verbositySwitch staticOptions) ("rightbestTaxIdResult: " ++ (show rightBestTaxIdResult) ++ "\n") (tempDirPath staticOptions)
       let blastHits = concatMap hits (results rightBlast)
       writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString  ++ "_2blastHits") (showlines blastHits)
       --filter by length
       let blastHitsFilteredByLength = filterByHitLength blastHits queryLength (lengthFilterToggle staticOptions)
       writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString  ++ "_3blastHitsFilteredByLength") (showlines blastHitsFilteredByLength)
       let blastHitsFilteredByCoverage = filterByCoverage blastHitsFilteredByLength queryLength (coverageFilterToggle staticOptions)
       writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString  ++ "_3ablastHitsFilteredByLength") (showlines blastHitsFilteredByCoverage)
       --tag BlastHits with TaxId
       blastHitsWithTaxIdOutput <- retrieveBlastHitsTaxIdEntrez blastHitsFilteredByCoverage
       let uncheckedBlastHitsWithTaxIdList = map (\(blasthits,taxIdout) -> (blasthits,extractTaxIdFromEntrySummaries taxIdout)) blastHitsWithTaxIdOutput
       let checkedBlastHitsWithTaxId = filter (\(_,taxids) -> not (null taxids)) uncheckedBlastHitsWithTaxIdList
       --todo checked blasthittaxidswithblasthits need to be merged as taxid blasthit pairs
       let blastHitsWithTaxId = zip (concatMap fst checkedBlastHitsWithTaxId) (concatMap snd checkedBlastHitsWithTaxId)
       blastHitsWithParentTaxIdOutput <- retrieveParentTaxIdsEntrez blastHitsWithTaxId
       --let blastHitsWithParentTaxId = concat blastHitsWithParentTaxIdOutput
       -- filter by ParentTaxId (only one hit per TaxId)
       let blastHitsFilteredByParentTaxIdWithParentTaxId = filterByParentTaxId blastHitsWithParentTaxIdOutput True
       --let blastHitsFilteredByParentTaxId = map fst blastHitsFilteredByParentTaxIdWithParentTaxId
       writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString ++ "_4blastHitsFilteredByParentTaxId") (showlines blastHitsFilteredByParentTaxIdWithParentTaxId)
       -- Filtering with TaxTree (only hits from the same subtree as besthit)
       --let blastHitsWithTaxId = zip blastHitsFilteredByParentTaxId blastHittaxIdList
       --let (_, filteredBlastResults) = filterByNeighborhoodTreeConditional alignndmentModeInfernalToggle upperTaxLimit blastHitsWithTaxId (inputTaxNodes staticOptions) (fromJust upperTaxLimit) (singleHitperTaxToggle staticOptions)
       --writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString ++ "_5filteredBlastResults") (showlines filteredBlastResults)
       -- Coordinate generation
       let nonEmptyfilteredBlastResults = filter (\(blasthit,_) -> not (null (matches blasthit))) blastHitsFilteredByParentTaxIdWithParentTaxId
       let requestedSequenceElements = map (getRequestedSequenceElement queryLength) nonEmptyfilteredBlastResults
       writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString ++  "_6requestedSequenceElements") (showlines requestedSequenceElements)
       -- Retrieval of full sequences from entrez
       --fullSequencesWithSimilars <- retrieveFullSequences requestedSequenceElements
       fullSequencesWithSimilars <- retrieveFullSequences staticOptions requestedSequenceElements
       if null fullSequencesWithSimilars
         then do
           writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString ++ "_10afullSequencesWithSimilars") "No sequences retrieved"
           CE.evaluate (SearchResult [] Nothing)
         else do
           writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString ++ "_10afullSequencesWithSimilars") (showlines fullSequencesWithSimilars)
           let fullSequences = filterIdenticalSequences fullSequencesWithSimilars 100
           --let fullSequencesWithOrigin = map (\(parsedFasta,taxid,seqSubject) -> (parsedFasta,taxid,seqSubject,'B')) fullSequences
           writeFile (logFileDirectoryPath ++ "/" ++ queryIndexString ++ "_10fullSequences") (showlines fullSequences)
           let maybeFractionEvalueMatch = getHitWithFractionEvalue rightBlast
           if isNothing maybeFractionEvalueMatch
             then CE.evaluate (SearchResult [] Nothing) 
             else do
               let fractionEvalueMatch = fromJust maybeFractionEvalueMatch
               let dbSize = computeDataBaseSize (e_val fractionEvalueMatch) (bits fractionEvalueMatch) (fromIntegral queryLength ::Double)
               CE.evaluate (SearchResult fullSequences (Just dbSize))
     else CE.evaluate (SearchResult [] Nothing)  

-- |Computes size of blast db in Mb 
computeDataBaseSize :: Double -> Double -> Double -> Double 
computeDataBaseSize evalue bitscore querylength = ((evalue * 2 ** bitscore) / querylength)/10^(6 :: Integer)

alignCandidates :: StaticOptions -> ModelConstruction -> String -> SearchResult -> IO ([(Sequence,Int,L.ByteString)],[(Sequence,Int,L.ByteString)])
alignCandidates staticOptions modelConstruction multipleSearchResultPrefix searchResults = do
  let iterationDirectory = tempDirPath staticOptions ++ show (iterationNumber modelConstruction) ++ "/" ++ multipleSearchResultPrefix
  createDirectoryIfMissing False (iterationDirectory ++ "log")
  if null (candidates searchResults)
    then do
      writeFile (iterationDirectory ++ "log" ++ "/11candidates") ("No candidates to align")
      return ([],[])
    else do
      --refilter for similarity
      writeFile (iterationDirectory ++ "log" ++ "/11candidates") (showlines (candidates searchResults))
      let alignedSequences = map snd (V.toList (extractAlignedSequences (iterationNumber modelConstruction) modelConstruction))
      let filteredCandidates = filterWithCollectedSequences (candidates searchResults) alignedSequences 99
      writeFile (iterationDirectory ++ "log" ++ "/12candidatesFilteredByCollected") (showlines filteredCandidates)
      if alignmentModeInfernal modelConstruction
        then alignCandidatesInfernalMode staticOptions modelConstruction multipleSearchResultPrefix (blastDatabaseSize searchResults) filteredCandidates
        else alignCandidatesInitialMode staticOptions modelConstruction multipleSearchResultPrefix filteredCandidates

alignCandidatesInfernalMode :: StaticOptions -> ModelConstruction -> String -> Maybe Double -> [(Sequence,Int,L.ByteString)] -> IO ([(Sequence,Int,L.ByteString)],[(Sequence,Int,L.ByteString)])
alignCandidatesInfernalMode staticOptions modelConstruction multipleSearchResultPrefix blastDbSize filteredCandidates = do
  let iterationDirectory = tempDirPath staticOptions ++ show (iterationNumber modelConstruction) ++ "/" ++ multipleSearchResultPrefix
  let candidateSequences = extractCandidateSequences filteredCandidates 
  logVerboseMessage (verbositySwitch staticOptions) "Alignment Mode Infernal\n" (tempDirPath staticOptions)
  let indexedCandidateSequenceList = (V.toList candidateSequences)
  let cmSearchFastaFilePaths = map (constructFastaFilePaths iterationDirectory) indexedCandidateSequenceList
  let cmSearchFilePaths = map (constructCMsearchFilePaths iterationDirectory) indexedCandidateSequenceList
  let covarianceModelPath = tempDirPath staticOptions ++ show (iterationNumber modelConstruction - 1) ++ "/" ++ "model.cm"
  mapM_ (\(number,_nucleotideSequence) -> writeFasta (iterationDirectory ++ show number ++ ".fa") [_nucleotideSequence]) indexedCandidateSequenceList
  let zippedFastaCMSearchResultPaths = zip cmSearchFastaFilePaths cmSearchFilePaths  
  --check with cmSearch
  mapM_ (uncurry (systemCMsearch (cpuThreads staticOptions) ("-Z " ++ show (fromJust blastDbSize)) covarianceModelPath)) zippedFastaCMSearchResultPaths
  cmSearchResults <- mapM readCMSearch cmSearchFilePaths 
  writeFile (iterationDirectory ++ "cm_error") (concatMap show (lefts cmSearchResults))
  let rightCMSearchResults = rights cmSearchResults 
  let cmSearchCandidatesWithSequences = zip rightCMSearchResults filteredCandidates    
  let (trimmedSelectedCandidates,potentialCandidates,rejectedCandidates) = evaluePartitionTrimCMsearchHits (evalueThreshold modelConstruction) cmSearchCandidatesWithSequences
  createDirectoryIfMissing False (iterationDirectory ++ "log")
  writeFile (iterationDirectory ++ "log" ++ "/13selectedCandidates'") (showlines trimmedSelectedCandidates)
  writeFile (iterationDirectory ++ "log" ++ "/14rejectedCandidates'") (showlines rejectedCandidates)
  writeFile (iterationDirectory ++ "log" ++ "/15potentialCandidates'") (showlines potentialCandidates)                                         
  CE.evaluate (map snd trimmedSelectedCandidates,map snd potentialCandidates)

alignCandidatesInitialMode :: StaticOptions -> ModelConstruction -> String -> [(Sequence,Int,L.ByteString)] -> IO ([(Sequence,Int,L.ByteString)],[(Sequence,Int,L.ByteString)])
alignCandidatesInitialMode staticOptions modelConstruction multipleSearchResultPrefix filteredCandidates = do
  let iterationDirectory = tempDirPath staticOptions ++ show (iterationNumber modelConstruction) ++ "/" ++ multipleSearchResultPrefix
  --writeFile (iterationDirectory ++ "log" ++ "/11bcandidates") (showlines filteredCandidates)
  createDirectoryIfMissing False (iterationDirectory ++ "log")
  let candidateSequences = extractCandidateSequences filteredCandidates
  --writeFile (iterationDirectory ++ "log" ++ "/11ccandidates") (showlines (V.toList candidateSequences))
  logVerboseMessage (verbositySwitch staticOptions) "Alignment Mode Initial\n" (tempDirPath staticOptions)
  --write Fasta sequences
  let inputFastaFilepath = iterationDirectory ++ "input.fa"
  let inputFoldFilepath = iterationDirectory ++ "input.fold"
  writeFasta (iterationDirectory ++ "input.fa") ([inputFasta modelConstruction])
  logMessage (showlines (V.toList candidateSequences)) (tempDirPath staticOptions)
  V.mapM_ (\(number,nucleotideSequence') -> writeFasta (iterationDirectory ++ show number ++ ".fa") [nucleotideSequence']) candidateSequences
  let candidateFastaFilepath = V.toList (V.map (\(number,_) -> iterationDirectory ++ show number ++ ".fa") candidateSequences)
  let candidateFoldFilepath = V.toList (V.map (\(number,_) -> iterationDirectory ++ show number ++ ".fold") candidateSequences)
  let locarnainClustalw2FormatFilepath =  V.toList (V.map (\(number,_) -> iterationDirectory ++ show number ++ "." ++ "clustalmlocarna") candidateSequences)
  let candidateAliFoldFilepath = V.toList (V.map (\(number,_) -> iterationDirectory ++ show number ++ ".alifold") candidateSequences)
  let locarnaFilepath = V.toList (V.map (\(number,_) -> iterationDirectory ++ show number ++ "." ++ "mlocarna") candidateSequences)
  alignSequences "locarna" " --write-structure --free-endgaps=++-- " (replicate (V.length candidateSequences) inputFastaFilepath) candidateFastaFilepath locarnainClustalw2FormatFilepath locarnaFilepath
  --compute SequenceIdentities
  let sequenceIdentities = V.map (\(_,s) -> sequenceIdentity (inputFasta modelConstruction) s/(100 :: Double)) candidateSequences
  --compute SCI
  systemRNAfold inputFastaFilepath inputFoldFilepath
  inputfoldResult <- readRNAfold inputFoldFilepath
  let inputFoldMFE = foldingEnergy (fromRight inputfoldResult)
  mapM_ (uncurry systemRNAfold) (zip candidateFastaFilepath candidateFoldFilepath)
  foldResults <- mapM readRNAfold candidateFoldFilepath
  let candidateMFEs = map (foldingEnergy . fromRight) foldResults
  let averageMFEs = map (\candidateMFE -> (candidateMFE + inputFoldMFE)/2) candidateMFEs
  mapM_ (uncurry (systemRNAalifold "")) (zip locarnainClustalw2FormatFilepath candidateAliFoldFilepath)
  alifoldResults <- mapM readRNAalifold candidateAliFoldFilepath
  let consensusMFE = map (alignmentConsensusMinimumFreeEnergy . fromRight) alifoldResults
  let sciidfraction = map (\(consMFE,averMFE,seqId) -> (consMFE/averMFE)/seqId) (zip3 consensusMFE averageMFEs (V.toList sequenceIdentities))
  let idlog = concatMap (\(sciidfraction',consMFE,averMFE,seqId) -> show sciidfraction' ++ "," ++ show consMFE ++ "," ++ show averMFE ++ "," ++ show seqId ++ "\n")(zip4 sciidfraction consensusMFE averageMFEs (V.toList sequenceIdentities))
  writeFile (iterationDirectory ++ "log" ++ "/idlog") (idlog)
  let alignedCandidates = zip sciidfraction filteredCandidates
  writeFile (iterationDirectory ++ "log" ++ "/zscores") (showlines alignedCandidates)
  let (selectedCandidates,rejectedCandidates) = partition (\(sciidfraction',_) -> sciidfraction' > nSCICutoff staticOptions) alignedCandidates
  mapM_ print (zip3 consensusMFE averageMFEs (V.toList sequenceIdentities))
  writeFile (iterationDirectory ++ "log" ++ "/13selectedCandidates") (showlines selectedCandidates)
  writeFile (iterationDirectory ++ "log" ++ "/14rejectedCandidates") (showlines rejectedCandidates)
  CE.evaluate (map snd selectedCandidates,[])

setClusterNumber :: Int -> Int
setClusterNumber x
  | x <= 5 = x 
  | otherwise = 5 

findCutoffforClusterNumber :: Dendrogram a -> Int -> Distance -> Distance                
findCutoffforClusterNumber clustaloDendrogram numberOfClusters currentCutoff
  | currentClusterNumber >= numberOfClusters = currentCutoff
  | otherwise = findCutoffforClusterNumber clustaloDendrogram numberOfClusters (currentCutoff-0.01)
    where currentClusterNumber = length (cutAt clustaloDendrogram currentCutoff)

-- Selects Query sequence ids from all collected seqeuences. Queries are then fetched by extractQueries function.
selectQueries :: StaticOptions -> ModelConstruction -> [(Sequence,Int,L.ByteString)] -> IO [Sequence]
selectQueries staticOptions modelConstruction selectedCandidates = do
  logVerboseMessage (verbositySwitch staticOptions) "SelectQueries\n" (tempDirPath staticOptions)
  --Extract sequences from modelconstruction
  let alignedSequences = extractAlignedSequences (iterationNumber modelConstruction) modelConstruction 
  let candidateSequences = extractQueryCandidates selectedCandidates
  let iterationDirectory = tempDirPath staticOptions ++ show (iterationNumber modelConstruction) ++ "/"
  let stockholmFilepath = iterationDirectory ++ "model" ++ ".stockholm"
  let alignmentSequences = map snd (V.toList (V.concat [candidateSequences,alignedSequences]))
  if length alignmentSequences > 3
    then do
      if (querySelectionMethod staticOptions) == "clustering"
        then do
          --write Fasta sequences
          writeFasta (iterationDirectory ++ "query" ++ ".fa") alignmentSequences
          let fastaFilepath = iterationDirectory ++ "query" ++ ".fa"
          let clustaloFilepath = iterationDirectory ++ "query" ++ ".clustalo"
          let clustaloDistMatrixPath = iterationDirectory ++ "query" ++ ".matrix" 
          alignSequences "clustalo" ("--full --distmat-out=" ++ clustaloDistMatrixPath ++ " ") [fastaFilepath] [] [clustaloFilepath] []
          idsDistancematrix <- readClustaloDistMatrix clustaloDistMatrixPath
          logEither idsDistancematrix (tempDirPath staticOptions)
          let (clustaloIds,clustaloDistMatrix) = fromRight idsDistancematrix
          logVerboseMessage (verbositySwitch staticOptions) ("Clustalid: " ++ intercalate "," clustaloIds ++ "\n") (tempDirPath staticOptions)
          logVerboseMessage (verbositySwitch staticOptions) ("Distmatrix: " ++ show clustaloDistMatrix ++ "\n") (tempDirPath staticOptions)
          let clustaloDendrogram = dendrogram UPGMA clustaloIds (getDistanceMatrixElements clustaloIds clustaloDistMatrix)
          logVerboseMessage (verbositySwitch staticOptions) ("ClustaloDendrogram: " ++ show  clustaloDendrogram ++ "\n") (tempDirPath staticOptions)
          logVerboseMessage (verbositySwitch staticOptions) ("ClustaloDendrogram: " ++ show clustaloDistMatrix ++ "\n") (tempDirPath staticOptions)
          let numberOfClusters = setClusterNumber (length alignmentSequences)
          logVerboseMessage (verbositySwitch staticOptions) ("numberOfClusters: " ++ show numberOfClusters ++ "\n") (tempDirPath staticOptions)
          let dendrogramStartCutDistance = 1 :: Double
          let dendrogramCutDistance' = findCutoffforClusterNumber clustaloDendrogram numberOfClusters dendrogramStartCutDistance
          logVerboseMessage (verbositySwitch staticOptions) ("dendrogramCutDistance': " ++ show dendrogramCutDistance' ++ "\n") (tempDirPath staticOptions)
          let cutDendrogram = cutAt clustaloDendrogram dendrogramCutDistance'
          --putStrLn "cutDendrogram: "
          --print cutDendrogram
          let currentSelectedSequenceIds = map L.pack (take (queryNumber staticOptions) (concatMap (take 1 . elements) cutDendrogram))
          --let alignedSequences = fastaSeqData:map nucleotideSequence (concatMap sequenceRecords (taxRecords modelconstruction))
          let fastaSelectedSequences = concatMap (filterSequenceById alignmentSequences) currentSelectedSequenceIds
          stockholmSelectedSequences <- extractAlignmentSequencesByIds stockholmFilepath currentSelectedSequenceIds
          --Stockholm sequnces contain conservation annotation from cmalign in infernal mode
          let currentSelectedSequences = if (blastSoftmaskingToggle staticOptions) then stockholmSelectedSequences else fastaSelectedSequences
          --let currentSelectedQueries = concatMap (\querySeqId -> filter (\alignedSeq -> L.unpack (unSL (seqid alignedSeq)) == querySeqId) alignmentSequences) querySeqIds
          logVerboseMessage (verbositySwitch staticOptions) ("SelectedQueries: " ++ show currentSelectedSequences ++ "\n") (tempDirPath staticOptions)                       
          writeFile (tempDirPath staticOptions ++ show (iterationNumber modelConstruction) ++ "/log" ++ "/13selectedQueries") (showlines currentSelectedSequences)
          CE.evaluate currentSelectedSequences
        else do
          let fastaSelectedSequences = filterIdenticalSequences' alignmentSequences (95 :: Double)
          let currentSelectedSequenceIds = map (unSL . seqid) (take (queryNumber staticOptions) fastaSelectedSequences)
          stockholmSelectedSequences <- extractAlignmentSequencesByIds stockholmFilepath currentSelectedSequenceIds
          let currentSelectedSequences = if (blastSoftmaskingToggle staticOptions) then stockholmSelectedSequences else fastaSelectedSequences
          writeFile (tempDirPath staticOptions ++ show (iterationNumber modelConstruction) ++ "/log" ++ "/13selectedQueries") (showlines currentSelectedSequences)
          CE.evaluate currentSelectedSequences
    else return []


filterSequenceById :: [Sequence] -> L.ByteString-> [Sequence]
filterSequenceById alignmentSequences querySequenceId = filter (seqenceHasId querySequenceId) alignmentSequences

seqenceHasId :: L.ByteString -> Sequence -> Bool
seqenceHasId querySequenceId alignmentSequence = unSL (seqid alignmentSequence) == querySequenceId

constructModel :: ModelConstruction -> StaticOptions -> IO String
constructModel modelConstruction staticOptions = do
  --Extract sequences from modelconstruction
  let alignedSequences = extractAlignedSequences (iterationNumber modelConstruction) modelConstruction
  --The CM resides in the iteration directory where its input alignment originates from 
  let outputDirectory = tempDirPath staticOptions ++ show (iterationNumber modelConstruction - 1) ++ "/"
  let alignmentSequences = map snd (V.toList (V.concat [alignedSequences]))
  --write Fasta sequences
  writeFasta (outputDirectory ++ "model" ++ ".fa") alignmentSequences
  let fastaFilepath = outputDirectory ++ "model" ++ ".fa"
  let locarnaFilepath = outputDirectory ++ "model" ++ ".mlocarna"
  let stockholmFilepath = outputDirectory ++ "model" ++ ".stockholm"
  --- let reformatedClustalFilepath = outputDirectory ++ "model" ++ ".clustal.reformated"
  let updatedStructureStockholmFilepath = outputDirectory ++ "newstructuremodel" ++ ".stockholm"
  let cmalignCMFilepath = (tempDirPath staticOptions) ++ show (iterationNumber modelConstruction - 2) ++ "/" ++ "model" ++ ".cm"
  let cmFilepath = outputDirectory ++ "model" ++ ".cm"
  let cmCalibrateFilepath = outputDirectory ++ "model" ++ ".cmcalibrate"
  let cmBuildFilepath = outputDirectory ++ "model" ++ ".cmbuild"
  let alifoldFilepath = outputDirectory ++ "model" ++ ".alifold"
  let refinedAlignmentFilepath = outputDirectory ++ "modelrefined" ++ ".stockholm"
  let cmBuildOptions ="--refine " ++ refinedAlignmentFilepath
  if alignmentModeInfernal modelConstruction
     then do
       logVerboseMessage (verbositySwitch staticOptions) "Construct Model - infernal mode\n" (tempDirPath staticOptions)
       systemCMalign ("--cpu " ++ show (cpuThreads staticOptions)) cmalignCMFilepath fastaFilepath stockholmFilepath
       systemRNAalifold "-r --cfactor 0.6 --nfactor 0.5" stockholmFilepath alifoldFilepath
       replaceStatus <- replaceStockholmStructure stockholmFilepath alifoldFilepath updatedStructureStockholmFilepath
       if null replaceStatus
         then do
           systemCMbuild cmBuildOptions updatedStructureStockholmFilepath cmFilepath cmBuildFilepath
           systemCMcalibrate "fast" (cpuThreads staticOptions) cmFilepath cmCalibrateFilepath
           return cmFilepath
         else do
           logWarning ("Warning: A problem occured updating the secondary structure of iteration " ++ show (iterationNumber modelConstruction)  ++ " stockholm alignment: " ++ replaceStatus) (tempDirPath staticOptions)
           systemCMbuild cmBuildOptions stockholmFilepath cmFilepath cmBuildFilepath
           systemCMcalibrate "fast" (cpuThreads staticOptions) cmFilepath cmCalibrateFilepath
           return cmFilepath
     else do
       logVerboseMessage (verbositySwitch staticOptions) "Construct Model - initial mode\n" (tempDirPath staticOptions)
       alignSequences "mlocarna" ("--threads=" ++ show (cpuThreads staticOptions) ++ " ") [fastaFilepath] [] [locarnaFilepath] []
       mlocarnaAlignment <- readStructuralClustalAlignment locarnaFilepath
       logEither mlocarnaAlignment (tempDirPath staticOptions)
       let stockholAlignment = convertClustaltoStockholm (fromRight mlocarnaAlignment)
       writeFile stockholmFilepath stockholAlignment
       _ <- systemCMbuild cmBuildOptions stockholmFilepath cmFilepath cmBuildFilepath
       _ <- systemCMcalibrate "fast" (cpuThreads staticOptions) cmFilepath cmCalibrateFilepath
       return cmFilepath

-- | Replaces structure of input stockholm file with the consensus structure of alifoldFilepath and outputs updated stockholmfile
replaceStockholmStructure :: String -> String -> String -> IO String
replaceStockholmStructure stockholmFilepath alifoldFilepath updatedStructureStockholmFilepath = do
  inputAln <- readFile stockholmFilepath
  inputRNAalifold <- readRNAalifold alifoldFilepath
  if isLeft inputRNAalifold
    then do
     return (show (fromLeft inputRNAalifold))
    else do
     let alifoldstructure = alignmentConsensusDotBracket (fromRight inputRNAalifold)
     let seedLinesVector = V.fromList (lines inputAln)
     let structureIndices = V.toList (V.findIndices isStructureLine seedLinesVector)
     let updatedStructureElements = updateStructureElements seedLinesVector alifoldstructure structureIndices
     let newVector = seedLinesVector V.// updatedStructureElements
     let newVectorString = concatMap (++ "\n") (V.toList newVector)
     writeFile updatedStructureStockholmFilepath newVectorString
     return []

updateStructureElements :: V.Vector String -> String -> [Int] -> [(Int,String)]
updateStructureElements inputVector structureString indices
  | null indices = []
  | otherwise = newElement ++ updateStructureElements inputVector (drop structureLength structureString) (tail indices)
  where currentIndex = head indices
        currentElement = inputVector V.! currentIndex
        elementLength = length currentElement
        structureStartIndex = maximum (elemIndices ' ' currentElement) + 1
        structureLength = elementLength - structureStartIndex
        newElementHeader = take structureStartIndex currentElement
        newElementStructure = take structureLength structureString
        newElement = [(currentIndex,newElementHeader ++ newElementStructure)]

isStructureLine :: String -> Bool
isStructureLine input = "#=GC SS_cons" `isInfixOf` input

-- Generates iteration string for Log
iterationSummaryLog :: ModelConstruction -> String
iterationSummaryLog mC = output
  where upperTaxonomyLimitOutput = maybe "not set" show (upperTaxonomyLimit mC)
        output = "Upper taxonomy id limit: " ++ upperTaxonomyLimitOutput ++ ", Collected members: " ++ show (length (concatMap sequenceRecords (taxRecords mC))) ++ "\n"
             
-- | Used for passing progress to Alien server 
iterationSummary :: ModelConstruction -> StaticOptions -> IO()
iterationSummary mC sO = do
  --iteration -- tax limit -- bitscore cutoff -- blastresult -- aligned seqs --queries --fa link --aln link --cm link
  let upperTaxonomyLimitOutput = maybe "not set" show (upperTaxonomyLimit mC)
  let output = show (iterationNumber mC) ++ "," ++ upperTaxonomyLimitOutput ++ "," ++ show (length (concatMap sequenceRecords (taxRecords mC)))
  writeFile ((tempDirPath sO) ++ "/log/" ++ show (iterationNumber mC) ++ ".log") output        

-- | Used for passing progress to Alien server 
resultSummary :: ModelConstruction -> StaticOptions -> IO()
resultSummary mC sO = do
  --iteration -- tax limit -- bitscore cutoff -- blastresult -- aligned seqs --queries --fa link --aln link --cm link
  let upperTaxonomyLimitOutput = maybe "not set" show (upperTaxonomyLimit mC)
  let output = show (iterationNumber mC) ++ "," ++ upperTaxonomyLimitOutput ++ "," ++ show (length (concatMap sequenceRecords (taxRecords mC)))
  writeFile (tempDirPath sO ++ "/log/result" ++ ".log") output        
                       
readClustaloDistMatrix :: String -> IO (Either ParseError ([String],Matrix Double))             
readClustaloDistMatrix = parseFromFile genParserClustaloDistMatrix 
                      
genParserClustaloDistMatrix :: GenParser Char st ([String],Matrix Double)
genParserClustaloDistMatrix = do
  _ <- many1 digit
  newline
  clustaloDistRow <- many1 (try genParserClustaloDistRow) 
  eof
  return (map fst clustaloDistRow,fromLists (map snd clustaloDistRow))

genParserClustaloDistRow :: GenParser Char st (String,[Double])
genParserClustaloDistRow = do
  entryId <- many1 (noneOf " ")
  many1 space
  distances <- many1 (try genParserClustaloDistance)
  newline
  return (entryId,distances)

genParserClustaloDistance :: GenParser Char st Double
genParserClustaloDistance = do
  distance <- many1 (oneOf "1234567890.")
  optional (try (char ' ' ))
  return (readDouble distance)

getDistanceMatrixElements :: [String] -> Matrix Double -> String -> String -> Double
getDistanceMatrixElements ids distMatrix id1 id2 = distance
  -- Data.Matrix is indexed starting with 1
  where indexid1 = fromJust (elemIndex id1 ids) + 1
        indexid2 = fromJust (elemIndex id2 ids) + 1
        distance = getElem indexid1 indexid2 distMatrix

-- | Filter duplicates removes hits in sequences that were already collected. This happens during revisiting the starting subtree.
filterDuplicates :: ModelConstruction -> SearchResult -> SearchResult
filterDuplicates modelConstruction inputSearchResult = uniqueSearchResult
  where alignedSequences = map snd (V.toList (extractAlignedSequences (iterationNumber modelConstruction) modelConstruction))
        collectedIdentifiers = map seqid alignedSequences
        uniques = filter (\(s,_,_) -> notElem (seqid s) collectedIdentifiers) (candidates inputSearchResult)
        uniqueSearchResult = SearchResult uniques (blastDatabaseSize inputSearchResult)

-- | Filter a list of similar extended blast hits   
--filterIdenticalSequencesWithOrigin :: [(Sequence,Int,String,Char)] -> Double -> [(Sequence,Int,String,Char)]                            
--filterIdenticalSequencesWithOrigin (headSequence:rest) identitycutoff = result
--  where filteredSequences = filter (\x -> (sequenceIdentity (firstOfQuadruple headSequence) (firstOfQuadruple x)) < identitycutoff) rest 
--        result = headSequence:(filterIdenticalSequencesWithOrigin filteredSequences identitycutoff)
--filterIdenticalSequencesWithOrigin [] _ = []

-- | Filter a list of similar extended blast hits   
filterIdenticalSequences :: [(Sequence,Int,L.ByteString)] -> Double -> [(Sequence,Int,L.ByteString)] 
filterIdenticalSequences (headSequence:rest) identitycutoff = result
  where filteredSequences = filter (\x -> sequenceIdentity (firstOfTriple headSequence) (firstOfTriple x) < identitycutoff) rest 
        result = headSequence:filterIdenticalSequences filteredSequences identitycutoff
filterIdenticalSequences [] _ = []

-- | Filter sequences too similar to already aligned sequences
filterWithCollectedSequences :: [(Sequence,Int,L.ByteString)] -> [Sequence] -> Double -> [(Sequence,Int,L.ByteString)]                            
filterWithCollectedSequences inputCandidates collectedSequences identitycutoff = filter (isUnSimilarSequence collectedSequences identitycutoff . firstOfTriple) inputCandidates 
--filterWithCollectedSequences [] [] _ = []

-- | Filter alignment entries by similiarity  
filterIdenticalSequences' :: [Sequence] -> Double -> [Sequence]
filterIdenticalSequences' (headEntry:rest) identitycutoff = result
  where filteredEntries = filter (\ x -> (sequenceIdentity headEntry x) < identitycutoff) rest
        result = headEntry:filterIdenticalSequences' filteredEntries identitycutoff
filterIdenticalSequences' [] _ = []

-- | Filter alignment entries by similiarity  
filterIdenticalAlignmentEntry :: [ClustalAlignmentEntry] -> Double -> [ClustalAlignmentEntry]
filterIdenticalAlignmentEntry (headEntry:rest) identitycutoff = result
  where filteredEntries = filter (\x -> (stringIdentity (entryAlignedSequence headEntry) (entryAlignedSequence x)) < identitycutoff) rest
        result = headEntry:filterIdenticalAlignmentEntry filteredEntries identitycutoff
filterIdenticalAlignmentEntry [] _ = []

isUnSimilarSequence :: [Sequence] -> Double -> Sequence -> Bool
isUnSimilarSequence collectedSequences identitycutoff checkSequence = any (\ x -> (sequenceIdentity checkSequence x) < identitycutoff) collectedSequences
                
firstOfTriple :: (t, t1, t2) -> t
firstOfTriple (a,_,_) = a 

-- | Check if the result field of BlastResult is filled and if hits are present
blastMatchesPresent :: BlastResult -> Bool
blastMatchesPresent blastResult 
  | null resultList = False
  | otherwise = True
  where resultList = concatMap matches (concatMap hits (results blastResult))
                                
-- | Compute identity of sequences
stringIdentity :: String -> String -> Double
stringIdentity string1 string2 = identityPercent
   where distance = ED.levenshteinDistance ED.defaultEditCosts string1 string2
         maximumDistance = maximum [length string1,length string2]
         identityPercent = 100 - ((fromIntegral distance/fromIntegral maximumDistance) * (read "100" ::Double))

-- | Compute identity of sequences
sequenceIdentity :: Sequence -> Sequence -> Double
sequenceIdentity sequence1 sequence2 = identityPercent
  where distance = ED.levenshteinDistance ED.defaultEditCosts sequence1string sequence2string
        sequence1string = L.unpack (unSD (seqdata sequence1))
        sequence2string = L.unpack (unSD (seqdata sequence2))
        maximumDistance = maximum [length sequence1string,length sequence2string]
        identityPercent = 100 - ((fromIntegral distance/fromIntegral maximumDistance) * (read "100" ::Double))

getTaxonomicContextEntrez :: Maybe Int -> Maybe Taxon -> IO (Maybe Taxon)
getTaxonomicContextEntrez upperTaxLimit currentTaxonomicContext = do
  if isJust upperTaxLimit
      then if isJust currentTaxonomicContext
        then return currentTaxonomicContext
        else retrieveTaxonomicContextEntrez (fromJust upperTaxLimit)
          --return retrievedTaxonomicContext
    else return Nothing

setTaxonomicContextEntrez :: Int -> Maybe Taxon -> Maybe Int -> (Maybe Int, Maybe Int)
setTaxonomicContextEntrez currentIterationNumber currentTaxonomicContext subTreeTaxId 
  | currentIterationNumber == 0 = (subTreeTaxId, Nothing)
  | otherwise = setUpperLowerTaxLimitEntrez (fromJust subTreeTaxId) (fromJust currentTaxonomicContext)
                          
-- setTaxonomic Context for next candidate search, the upper bound of the last search become the lower bound of the next
setUpperLowerTaxLimitEntrez :: Int -> Taxon -> (Maybe Int, Maybe Int) 
setUpperLowerTaxLimitEntrez subTreeTaxId currentTaxonomicContext = (upperLimit,lowerLimit)
  where upperLimit = raiseTaxIdLimitEntrez subTreeTaxId currentTaxonomicContext
        lowerLimit = Just subTreeTaxId

raiseTaxIdLimitEntrez :: Int -> Taxon -> Maybe Int
raiseTaxIdLimitEntrez subTreeTaxId taxon = parentNodeTaxId
  where lastUpperBoundNodeIndex = fromJust (V.findIndex  (\node -> lineageTaxId node == subTreeTaxId) lineageExVector)
        linageNodeTaxId = Just (lineageTaxId (lineageExVector V.! (lastUpperBoundNodeIndex -1)))
        lineageExVector = V.fromList (lineageEx taxon)
        --the input taxid is not part of the lineage, therefor we look for further taxids in the lineage after we used the parent tax id of the input node
        parentNodeTaxId = if subTreeTaxId == taxonTaxId taxon then Just (taxonParentTaxId taxon) else linageNodeTaxId
       
constructNext :: Int -> ModelConstruction -> [(Sequence,Int,L.ByteString)] -> Maybe Int -> Maybe Taxon  -> [Sequence] -> [SearchResult] -> Bool -> ModelConstruction
constructNext currentIterationNumber modelconstruction alignmentResults upperTaxLimit inputTaxonomicContext inputSelectedQueries inputPotentialMembers toggleInfernalAlignmentModeTrue = nextModelConstruction
  where newIterationNumber = currentIterationNumber + 1
        taxEntries = taxRecords modelconstruction ++ buildTaxRecords alignmentResults currentIterationNumber
        potMembers = potentialMembers modelconstruction ++ inputPotentialMembers
        currentAlignmentMode = toggleInfernalAlignmentModeTrue || alignmentModeInfernal modelconstruction
        nextModelConstruction = ModelConstruction newIterationNumber (inputFasta modelconstruction) taxEntries upperTaxLimit inputTaxonomicContext (evalueThreshold modelconstruction) currentAlignmentMode inputSelectedQueries potMembers
         
buildTaxRecords :: [(Sequence,Int,L.ByteString)] -> Int -> [TaxonomyRecord]
buildTaxRecords alignmentResults currentIterationNumber = taxonomyRecords
  where taxIdGroups = groupBy sameTaxIdAlignmentResult alignmentResults
        taxonomyRecords = map (buildTaxRecord currentIterationNumber) taxIdGroups    

sameTaxIdAlignmentResult :: (Sequence,Int,L.ByteString) -> (Sequence,Int,L.ByteString) -> Bool
sameTaxIdAlignmentResult (_,taxId1,_) (_,taxId2,_) = taxId1 == taxId2

buildTaxRecord :: Int -> [(Sequence,Int,L.ByteString)] -> TaxonomyRecord
buildTaxRecord currentIterationNumber entries = taxRecord
  where recordTaxId = (\(_,currentTaxonomyId,_) -> currentTaxonomyId) (head entries)
        seqRecords = map (buildSeqRecord currentIterationNumber)  entries
        taxRecord = TaxonomyRecord recordTaxId seqRecords

buildSeqRecord :: Int -> (Sequence,Int,L.ByteString) -> SequenceRecord 
buildSeqRecord currentIterationNumber (parsedFasta,_,seqSubject) = SequenceRecord parsedFasta currentIterationNumber seqSubject    

-- | Partitions sequences by containing a cmsearch hit and extracts the hit region as new sequence
evaluePartitionTrimCMsearchHits :: Double -> [(CMsearch,(Sequence, Int, L.ByteString))] -> ([(CMsearch,(Sequence, Int, L.ByteString))],[(CMsearch,(Sequence, Int, L.ByteString))],[(CMsearch,(Sequence, Int, L.ByteString))])
evaluePartitionTrimCMsearchHits eValueThreshold cmSearchCandidatesWithSequences = (trimmedSelectedCandidates,potentialCandidates,rejectedCandidates)
  where potentialMemberseValueThreshold = eValueThreshold * 1000
        (prefilteredCandidates,rejectedCandidates) = partition (\(cmSearchResult,_) -> any (\hitScore' -> potentialMemberseValueThreshold >= hitEvalue hitScore') (cmsearchHits cmSearchResult)) cmSearchCandidatesWithSequences
        (selectedCandidates,potentialCandidates) = partition (\(cmSearchResult,_) -> any (\hitScore' -> eValueThreshold >= hitEvalue hitScore') (cmsearchHits cmSearchResult)) prefilteredCandidates
        trimmedSelectedCandidates = map (\(cmSearchResult,inputSequence) -> (cmSearchResult,trimCMsearchHit cmSearchResult inputSequence)) selectedCandidates
        
        
trimCMsearchHit :: CMsearch -> (Sequence, Int, L.ByteString) -> (Sequence, Int, L.ByteString)
trimCMsearchHit cmSearchResult (inputSequence,b,c) = (subSequence,b,c)
  where hitScoreEntry = head (cmsearchHits cmSearchResult)
        sequenceString = L.unpack (unSD (seqdata inputSequence))
        sequenceSubstring = cmSearchsubString (hitStart hitScoreEntry) (hitEnd hitScoreEntry) sequenceString
        --extend original seqheader
        newSequenceHeader =  L.pack (L.unpack (unSL (seqheader inputSequence)) ++ "cmS_" ++ show (hitStart hitScoreEntry) ++ "_" ++ show (hitEnd hitScoreEntry) ++ "_" ++ show (hitStrand hitScoreEntry))
        subSequence = Seq (SeqLabel newSequenceHeader) (SeqData (L.pack sequenceSubstring)) Nothing

-- | Extract a substring with coordinates from cmsearch, first nucleotide has index 1
cmSearchsubString :: Int -> Int -> String -> String
cmSearchsubString startSubString endSubString inputString 
  | startSubString < endSubString = take (endSubString - (startSubString -1))(drop (startSubString - 1) inputString)
  | startSubString > endSubString = take (reverseEnd - (reverseStart - 1))(drop (reverseStart - 1 ) (reverse inputString))
  | otherwise = take (endSubString - (startSubString -1))(drop (startSubString - 1) inputString)
  where stringLength = length inputString
        reverseStart = stringLength - (startSubString + 1)
        reverseEnd = stringLength - (endSubString - 1)
                     
extractQueries :: Int -> ModelConstruction -> [Sequence] 
extractQueries foundSequenceNumber modelconstruction
  | foundSequenceNumber < 3 = [fastaSeqData] 
  | otherwise = querySequences' 
  where fastaSeqData = inputFasta modelconstruction
        querySequences' = selectedQueries modelconstruction
        
extractQueryCandidates :: [(Sequence,Int,L.ByteString)] -> V.Vector (Int,Sequence)
extractQueryCandidates querycandidates = indexedSeqences
  where sequences = map (\(candidateSequence,_,_) -> candidateSequence) querycandidates
        indexedSeqences = V.map (\(number,candidateSequence) -> (number + 1,candidateSequence))(V.indexed (V.fromList sequences))

buildTaxFilterQuery :: Maybe Int -> Maybe Int -> String
buildTaxFilterQuery upperTaxLimit lowerTaxLimit
  | isNothing upperTaxLimit = ""
  | isNothing lowerTaxLimit =  "&ENTREZ_QUERY=" ++ encodedTaxIDQuery (fromJust upperTaxLimit)
  | otherwise = "&ENTREZ_QUERY=" ++ "%28txid" ++ show (fromJust upperTaxLimit)  ++ "%5BORGN%5D%29" ++ "NOT" ++ "%28txid" ++ show (fromJust lowerTaxLimit) ++ "%5BORGN%5D&EQ_OP%29"
 
buildHitNumberQuery :: String -> String
buildHitNumberQuery hitNumber
  | hitNumber == "" = ""
  | otherwise = "&ALIGNMENTS=" ++ hitNumber

encodedTaxIDQuery :: Int -> String
encodedTaxIDQuery taxID = "txid" ++ show taxID ++ "%20%5BORGN%5D&EQ_OP"

-- | Adds cm prefix to pseudo random number
randomid :: Int16 -> String
randomid number = "cm" ++ show number

-- | Create session id for RNAlien
createSessionID :: Maybe String -> IO String
createSessionID sessionIdentificator = do
  if isJust sessionIdentificator
    then return (fromJust sessionIdentificator)
    else do
      randomNumber <- randomIO :: IO Int16
      let sessionId = randomid (abs (randomNumber))
      return sessionId
                           
-- | Run external locarna command and read the output into the corresponding datatype
systemlocarna :: String -> (String,String,String,String) -> IO ExitCode
systemlocarna options (inputFilePath1, inputFilePath2, clustalformatoutputFilePath, outputFilePath) = system ("locarna " ++ options ++ " --clustal=" ++ clustalformatoutputFilePath  ++ " " ++ inputFilePath1  ++ " " ++ inputFilePath2 ++ " > " ++ outputFilePath)

-- | Run external mlocarna command and read the output into the corresponding datatype, there is also a folder created at the location of the input fasta file
systemMlocarna :: String -> (String,String) -> IO ExitCode
systemMlocarna options (inputFilePath, outputFilePath) = system ("mlocarna " ++ options ++ " " ++ inputFilePath ++ " > " ++ outputFilePath)
 
-- | Run external mlocarna command and read the output into the corresponding datatype, there is also a folder created at the location of the input fasta file, the job is terminated after the timeout provided in seconds
systemMlocarnaWithTimeout :: String -> String -> (String,String) -> IO ExitCode
systemMlocarnaWithTimeout timeout options (inputFilePath, outputFilePath) = system ("timeout " ++ timeout ++"s "++ "mlocarna " ++ options ++ " " ++ inputFilePath ++ " > " ++ outputFilePath)
       
-- | Run external clustalo command and return the Exitcode
systemClustalw2 :: String -> (String,String,String) -> IO ExitCode
systemClustalw2 options (inputFilePath, outputFilePath, summaryFilePath) = system ("clustalw2 " ++ options ++ "-INFILE=" ++ inputFilePath ++ " -OUTFILE=" ++ outputFilePath ++ ">" ++ summaryFilePath)

-- | Run external clustalo command and return the Exitcode
systemClustalo :: String -> (String,String) -> IO ExitCode
systemClustalo options (inputFilePath, outputFilePath) = system ("clustalo " ++ options ++ "--infile=" ++ inputFilePath ++ " >" ++ outputFilePath)

-- | Run external CMbuild command and read the output into the corresponding datatype 
systemCMbuild ::  String -> String -> String -> String -> IO ExitCode
systemCMbuild options alignmentFilepath modelFilepath outputFilePath = system ("cmbuild " ++ options ++ " " ++ modelFilepath ++ " " ++ alignmentFilepath  ++ " > " ++ outputFilePath)  
                                       
-- | Run CMCompare and read the output into the corresponding datatype
systemCMcompare ::  String -> String -> String -> IO ExitCode
systemCMcompare model1path model2path outputFilePath = system ("CMCompare -q " ++ model1path ++ " " ++ model2path ++ " >" ++ outputFilePath)

-- | Run CMsearch 
systemCMsearch :: Int -> String -> String -> String -> String -> IO ExitCode
systemCMsearch cpus options covarianceModelPath sequenceFilePath outputPath = system ("cmsearch --notrunc --cpu " ++ show cpus ++ " " ++ options ++ " -g " ++ covarianceModelPath ++ " " ++ sequenceFilePath ++ "> " ++ outputPath)

-- | Run CMstat
systemCMstat :: String -> String -> IO ExitCode
systemCMstat covarianceModelPath outputPath = system ("cmstat " ++ covarianceModelPath ++ " > " ++ outputPath)

-- | Run CMcalibrate and return exitcode
systemCMcalibrate :: String -> Int -> String -> String -> IO ExitCode 
systemCMcalibrate mode cpus covarianceModelPath outputPath 
  | mode == "fast" = system ("cmcalibrate --beta 1E-4 --cpu " ++ show cpus ++ " " ++ covarianceModelPath ++ "> " ++ outputPath)
  | otherwise = system ("cmcalibrate --cpu " ++ show cpus ++ " " ++ covarianceModelPath ++ "> " ++ outputPath)


-- | Run CMcalibrate and return exitcode
systemCMalign :: String -> String -> String -> String -> IO ExitCode 
systemCMalign options filePathCovarianceModel filePathSequence filePathAlignment = system ("cmalign " ++ options ++ " " ++ filePathCovarianceModel ++ " " ++ filePathSequence ++ "> " ++ filePathAlignment)

compareCM :: String -> String -> String -> IO (Either String Double)
compareCM rfamCMPath resultCMpath outputDirectory = do
  let myOptions = defaultDecodeOptions {
      decDelimiter = fromIntegral (ord ' ')
  }
  let rfamCMFileName = FP.takeBaseName rfamCMPath
  let resultCMFileName = FP.takeBaseName resultCMpath
  let cmcompareResultPath = outputDirectory ++ rfamCMFileName ++ resultCMFileName ++ ".cmcompare"
  _ <- systemCMcompare rfamCMPath resultCMpath cmcompareResultPath
  inputCMcompare <- readFile cmcompareResultPath
  let singlespaceCMcompare = unwords(words inputCMcompare)
  let decodedCmCompareOutput = head (V.toList (fromRight (decodeWith myOptions NoHeader (L.pack singlespaceCMcompare) :: Either String (V.Vector [String]))))
  --two.cm   three.cm     27.996     19.500 CCCAAAGGGCCCAAAGGG (((...)))(((...))) (((...)))(((...))) [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] [11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27]
  let bitscore1 = read (decodedCmCompareOutput !! 2) :: Double
  let bitscore2 = read (decodedCmCompareOutput !! 3) :: Double
  let minmax = minimum [bitscore1,bitscore2]
  return (Right minmax)
                                                                 
readInt :: String -> Int
readInt = read

readDouble :: String -> Double
readDouble = read
 
extractCandidateSequences :: [(Sequence,Int,L.ByteString)] -> V.Vector (Int,Sequence)
extractCandidateSequences candidates' = indexedSeqences
  where sequences = map (\(inputSequence,_,_) -> inputSequence) candidates'
        indexedSeqences = V.map (\(number,inputSequence) -> (number + 1,inputSequence))(V.indexed (V.fromList sequences))
        
extractAlignedSequences :: Int -> ModelConstruction ->  V.Vector (Int,Sequence)
extractAlignedSequences iterationnumber modelconstruction
  | iterationnumber == 0 =  V.map (\(number,seq') -> (number + 1,seq')) (V.indexed (V.fromList [inputSequence]))
  | otherwise = indexedSeqRecords
  where inputSequence = inputFasta modelconstruction
        seqRecordsperTaxrecord = map sequenceRecords (taxRecords modelconstruction)
        seqRecords = concat seqRecordsperTaxrecord
        --alignedSeqRecords = filter (\seqRec -> (aligned seqRec) > 0) seqRecords 
        indexedSeqRecords = V.map (\(number,seq') -> (number + 1,seq')) (V.indexed (V.fromList (inputSequence : map nucleotideSequence seqRecords)))

filterByParentTaxId :: [(BlastHit,Int)] -> Bool -> [(BlastHit,Int)]
filterByParentTaxId blastHitsWithParentTaxId singleHitPerParentTaxId   
  |  singleHitPerParentTaxId = singleBlastHitperParentTaxId
  |  otherwise = blastHitsWithParentTaxId
  where blastHitsWithParentTaxIdSortedByParentTaxId = sortBy compareTaxId blastHitsWithParentTaxId
        blastHitsWithParentTaxIdGroupedByParentTaxId = groupBy sameTaxId blastHitsWithParentTaxIdSortedByParentTaxId
        singleBlastHitperParentTaxId = map (maximumBy compareHitEValue) blastHitsWithParentTaxIdGroupedByParentTaxId

filterByHitLength :: [BlastHit] -> Int -> Bool -> [BlastHit]
filterByHitLength blastHits queryLength filterOn 
  | filterOn = filteredBlastHits
  | otherwise = blastHits
  where filteredBlastHits = filter (hitLengthCheck queryLength) blastHits

-- | Hits should have a compareable length to query
hitLengthCheck :: Int -> BlastHit -> Bool
hitLengthCheck queryLength blastHit = lengthStatus
  where  blastMatches = matches blastHit
         minHfrom = minimum (map h_from blastMatches)
         minHfromHSP = fromJust (find (\hsp -> minHfrom == h_from hsp) blastMatches)
         maxHto = maximum (map h_to blastMatches)
         maxHtoHSP = fromJust (find (\hsp -> maxHto == h_to hsp) blastMatches)
         minHonQuery = q_from minHfromHSP
         maxHonQuery = q_to maxHtoHSP
         startCoordinate = minHfrom - minHonQuery 
         endCoordinate = maxHto + (queryLength - maxHonQuery) 
         fullSeqLength = endCoordinate - startCoordinate
         lengthStatus = fullSeqLength < (queryLength * 3)

filterByCoverage :: [BlastHit] -> Int -> Bool -> [BlastHit]
filterByCoverage blastHits queryLength filterOn 
  | filterOn = filteredBlastHits
  | otherwise = blastHits
  where filteredBlastHits = filter (coverageCheck queryLength) blastHits

-- | Hits should have a compareable length to query
coverageCheck :: Int -> BlastHit -> Bool
coverageCheck queryLength blastHit = coverageStatus
  where  blastMatches = matches blastHit
         maxIdentity = fromIntegral (maximum (map (snd . Bio.BlastXML.identity) blastMatches))
         coverageStatus = (maxIdentity/(fromIntegral queryLength))* (100 :: Double) >= (80 :: Double)
         
-- | Wrapper for retrieveFullSequence that rerequests incomplete return sequees
retrieveFullSequences :: StaticOptions -> [(String,Int,Int,String,T.Text,Int,L.ByteString)] -> IO [(Sequence,Int,L.ByteString)]
retrieveFullSequences staticOptions requestedSequences = do
  fullSequences <- mapM (retrieveFullSequence (tempDirPath staticOptions)) requestedSequences
  if any (isNothing . firstOfTriple) fullSequences
    then do
      let fullSequencesWithRequestedSequences = zip fullSequences requestedSequences
      --let (failedRetrievals, successfulRetrievals) = partition (\x -> L.null (unSD (seqdata (firstOfTriple (fst x))))) fullSequencesWithRequestedSequences
      let (failedRetrievals, successfulRetrievals) = partition (isNothing . firstOfTriple . fst) fullSequencesWithRequestedSequences
      --we try to reretrieve failed entries once
      missingSequences <- mapM (retrieveFullSequence (tempDirPath staticOptions) .snd) failedRetrievals
      let (stillMissingSequences,reRetrievedSequences) = partition (isNothing . firstOfTriple) missingSequences
      logWarning ("Sequence retrieval failed: \n" ++ concatMap show stillMissingSequences ++ "\n") (tempDirPath staticOptions)
      let unwrappedRetrievals = map (\(x,y,z) -> (fromJust x,y,z))  (map fst successfulRetrievals ++ reRetrievedSequences)
      CE.evaluate unwrappedRetrievals
    else CE.evaluate (map (\(x,y,z) -> (fromJust x,y,z)) fullSequences)
        
retrieveFullSequence :: String -> (String,Int,Int,String,T.Text,Int,L.ByteString) -> IO (Maybe Sequence,Int,L.ByteString)
retrieveFullSequence temporaryDirectoryPath (nucleotideId,seqStart,seqStop,strand,_,taxid,subject') = do
  let program' = Just "efetch"
  let database' = Just "nucleotide"
  let registrationInfo = buildRegistration "RNAlien" "florian.eggenhofer@univie.ac.at"
  let queryString = "id=" ++ nucleotideId ++ "&seq_start=" ++ show seqStart ++ "&seq_stop=" ++ show seqStop ++ "&rettype=fasta" ++ "&strand=" ++ strand ++ registrationInfo
  let entrezQuery = EntrezHTTPQuery program' database' queryString 
  result <- CE.catch (entrezHTTP entrezQuery)
              (\e -> do let err = show (e :: CE.IOException)
                        logWarning ("Warning: Full sequence retrieval failed:" ++ " " ++ err) temporaryDirectoryPath
                        return [])
  if null result
    then return (Nothing,taxid,subject')
    else do
      if null ((mkSeqs . L.lines) (L.pack result))
        then return (Nothing,taxid,subject')
        else do
          let parsedFasta = head ((mkSeqs . L.lines) (L.pack result))
          if L.null (unSD (seqdata parsedFasta))
            then return (Nothing,taxid,subject')
            else CE.evaluate (Just parsedFasta,taxid,subject')
 
getRequestedSequenceElement :: Int -> (BlastHit,Int) -> (String,Int,Int,String,T.Text,Int,L.ByteString)
getRequestedSequenceElement queryLength (blastHit,taxid) 
  | blastHitIsReverseComplement (blastHit,taxid) = getReverseRequestedSequenceElement queryLength (blastHit,taxid)
  | otherwise = getForwardRequestedSequenceElement queryLength (blastHit,taxid)

blastHitIsReverseComplement :: (BlastHit,Int) -> Bool
blastHitIsReverseComplement (blastHit,_) = isReverse
  where blastMatch = head (matches blastHit)
        firstHSPfrom = h_from blastMatch
        firstHSPto = h_to blastMatch
        isReverse = firstHSPfrom > firstHSPto

getForwardRequestedSequenceElement :: Int -> (BlastHit,Int) -> (String,Int,Int,String,T.Text,Int,L.ByteString)
getForwardRequestedSequenceElement queryLength (blastHit,taxid) = (geneIdentifier',startcoordinate,endcoordinate,strand,accession',taxid,subjectBlast)
   where    accession' = extractAccession blastHit
            subjectBlast = unSL (subject blastHit)
            geneIdentifier' = extractGeneId blastHit
            blastMatch = head (matches blastHit)
            blastHitOriginSequenceLength = slength blastHit
            minHfrom = h_from blastMatch
            maxHto = h_to blastMatch
            minHonQuery = q_from blastMatch
            maxHonQuery = q_to blastMatch
            --unsafe coordinates may exceed length of available sequence
            unsafestartcoordinate = minHfrom - minHonQuery 
            unsafeendcoordinate = maxHto + (queryLength - maxHonQuery) 
            startcoordinate = lowerBoundryCoordinateSetter 0 unsafestartcoordinate
            endcoordinate = upperBoundryCoordinateSetter blastHitOriginSequenceLength unsafeendcoordinate 
            strand = "1"
            ---- 
            --blastMatches = matches blastHit
            --blastHitOriginSequenceLength = slength blastHit
            --minHfrom = minimum (map h_from blastMatches)
            --minHfromHSP = fromJust (find (\hsp -> minHfrom == h_from hsp) blastMatches)
            --maxHto = maximum (map h_to blastMatches)
            --maxHtoHSP = fromJust (find (\hsp -> maxHto == h_to hsp) blastMatches)
            --minHonQuery = q_from minHfromHSP
            --maxHonQuery = q_to maxHtoHSP
            --unsafe coordinates may exceed length of available sequence
            --unsafestartcoordinate = minHfrom - minHonQuery 
            --unsafeendcoordinate = maxHto + (queryLength - maxHonQuery) 
            --startcoordinate = lowerBoundryCoordinateSetter 0 unsafestartcoordinate
            --endcoordinate = upperBoundryCoordinateSetter blastHitOriginSequenceLength unsafeendcoordinate 
            --strand = "1"

lowerBoundryCoordinateSetter :: Int -> Int -> Int
lowerBoundryCoordinateSetter lowerBoundry currentValue
  | currentValue < lowerBoundry = lowerBoundry
  | otherwise = currentValue

upperBoundryCoordinateSetter :: Int -> Int -> Int
upperBoundryCoordinateSetter upperBoundry currentValue
  | currentValue > upperBoundry = upperBoundry
  | otherwise = currentValue

getReverseRequestedSequenceElement :: Int -> (BlastHit,Int) -> (String,Int,Int,String,T.Text,Int,L.ByteString)
getReverseRequestedSequenceElement queryLength (blastHit,taxid) = (geneIdentifier',startcoordinate,endcoordinate,strand,accession',taxid,subjectBlast)
   where   accession' = extractAccession blastHit
           subjectBlast = unSL (subject blastHit)        
           geneIdentifier' = extractGeneId blastHit
           blastMatch = head (matches blastHit)
           blastHitOriginSequenceLength = slength blastHit               
           maxHfrom = h_from blastMatch      
           minHto = h_to blastMatch
           minHonQuery = q_from blastMatch
           maxHonQuery = q_to blastMatch
           --unsafe coordinates may exceed length of avialable sequence
           unsafestartcoordinate = maxHfrom + minHonQuery 
           unsafeendcoordinate = minHto - (queryLength - maxHonQuery) 
           startcoordinate = lowerBoundryCoordinateSetter 0 unsafeendcoordinate 
           endcoordinate = upperBoundryCoordinateSetter blastHitOriginSequenceLength unsafestartcoordinate 
           strand = "2"
           --
           --blastMatches = matches blastHit
           --blastHitOriginSequenceLength = slength blastHit               
           --maxHfrom = maximum (map h_from blastMatches)
           --maxHfromHSP = fromJust (find (\hsp -> maxHfrom == h_from hsp) blastMatches)     
           --minHto = minimum (map h_to blastMatches)
           --minHtoHSP = fromJust (find (\hsp -> minHto == h_to hsp) blastMatches)
           --minHonQuery = q_from maxHfromHSP
           --maxHonQuery = q_to minHtoHSP
           --unsafe coordinates may exeed length of avialable sequence
           --unsafestartcoordinate = maxHfrom + minHonQuery 
           --unsafeendcoordinate = minHto - (queryLength - maxHonQuery) 
           --startcoordinate = lowerBoundryCoordinateSetter 0 unsafeendcoordinate 
           --endcoordinate = upperBoundryCoordinateSetter blastHitOriginSequenceLength unsafestartcoordinate 
           --strand = "2"

--computeAlignmentSCIs :: [String] -> [String] -> IO ()
--computeAlignmentSCIs alignmentFilepaths rnazOutputFilepaths = do
--  let zippedFilepaths = zip alignmentFilepaths rnazOutputFilepaths
--  mapM_ systemRNAz zippedFilepaths  

alignSequences :: String -> String -> [String] -> [String] -> [String] -> [String] -> IO ()
alignSequences program' options fastaFilepaths fastaFilepaths2 alignmentFilepaths summaryFilepaths = do
  let zipped4Filepaths = zip4 fastaFilepaths fastaFilepaths2 alignmentFilepaths summaryFilepaths
  let zipped3Filepaths = zip3 fastaFilepaths alignmentFilepaths summaryFilepaths 
  let zippedFilepaths = zip fastaFilepaths alignmentFilepaths
  let timeout = "3600"
  case program' of
    "locarna" -> mapM_ (systemlocarna options) zipped4Filepaths
    "mlocarna" -> mapM_ (systemMlocarna options) zippedFilepaths
    "mlocarnatimeout" -> mapM_ (systemMlocarnaWithTimeout timeout options) zippedFilepaths
    "clustalo" -> mapM_ (systemClustalo options) zippedFilepaths
    _ -> mapM_ (systemClustalw2 options ) zipped3Filepaths

constructFastaFilePaths :: String -> (Int, Sequence) -> String
constructFastaFilePaths currentDirectory (fastaIdentifier, _) = currentDirectory ++ show fastaIdentifier ++".fa"

constructCMsearchFilePaths :: String -> (Int, Sequence) -> String
constructCMsearchFilePaths currentDirectory (fastaIdentifier, _) = currentDirectory ++ show fastaIdentifier ++".cmsearch"
                                                                                  
-- Smaller e-Values are greater, the maximum function is applied
compareHitEValue :: (BlastHit,Int) -> (BlastHit,Int) -> Ordering                    
compareHitEValue (hit1,_) (hit2,_)
  | hitEValue hit1 > hitEValue hit2 = LT
  | hitEValue hit1 < hitEValue hit2 = GT
  -- in case of equal evalues the first hit is selected
  | hitEValue hit1 == hitEValue hit2 = GT                                           
-- comparing (hitEValue . Down . fst)
compareHitEValue (_,_) (_,_) = EQ 

compareTaxId :: (BlastHit,Int) -> (BlastHit,Int) -> Ordering            
compareTaxId (_,taxId1) (_,taxId2)
  | taxId1 > taxId2 = LT
  | taxId1 < taxId2 = GT
  -- in case of equal evalues the first hit is selected
  | taxId1 == taxId2 = EQ
compareTaxId (_,_)  (_,_) = EQ
                       
sameTaxId :: (BlastHit,Int) -> (BlastHit,Int) -> Bool
sameTaxId (_,taxId1) (_,taxId2) = taxId1 == taxId2

-- | NCBI uses the e-Value of the best HSP as the Hits e-Value
hitEValue :: BlastHit -> Double
hitEValue hit = minimum (map e_val (matches hit))
                          
convertFastaFoldStockholm :: Sequence -> String -> String
convertFastaFoldStockholm fastasequence foldedStructure = stockholmOutput
  where alnHeader = "# STOCKHOLM 1.0\n\n"
        --(L.unpack (unSL (seqheader inputFasta')))) ++ "\n" ++ (map toUpper (L.unpack (unSD (seqdata inputFasta')))) ++ "\n"
        seqIdentifier = L.unpack (unSL (seqheader fastasequence))
        seqSequence = L.unpack (unSD (seqdata fastasequence))
        identifierLength = length seqIdentifier
        spacerLength' = maximum [14,identifierLength + 2]
        spacer = replicate (spacerLength' - identifierLength) ' '
        entrystring = seqIdentifier ++ spacer ++ seqSequence ++ "\n"
        structureString = "#=GC SS_cons" ++ replicate (spacerLength' - 12) ' ' ++ foldedStructure ++ "\n"
        bottom = "//"
        stockholmOutput = alnHeader ++ entrystring ++ structureString ++ bottom
                   
convertClustaltoStockholm :: StructuralClustalAlignment -> String
convertClustaltoStockholm parsedMlocarnaAlignment = stockholmOutput
  where alnHeader = "# STOCKHOLM 1.0\n\n"
        clustalAlignment = structuralAlignmentEntries parsedMlocarnaAlignment
        uniqueIds = nub (map entrySequenceIdentifier clustalAlignment)
        mergedEntries = map (mergeEntry clustalAlignment) uniqueIds
        maxIdentifierLenght = maximum (map (length . entrySequenceIdentifier) clustalAlignment)
        spacerLength' = maxIdentifierLenght + 2
        stockholmEntries = concatMap (buildStockholmAlignmentEntries spacerLength') mergedEntries
        structureString = "#=GC SS_cons" ++ replicate (spacerLength' - 12) ' ' ++ secondaryStructureTrack parsedMlocarnaAlignment ++ "\n"
        bottom = "//"
        stockholmOutput = alnHeader ++ stockholmEntries ++ structureString ++ bottom

mergeEntry :: [ClustalAlignmentEntry] -> String -> ClustalAlignmentEntry
mergeEntry clustalAlignment uniqueId = mergedEntry
  where idEntries = filter (\entry -> entrySequenceIdentifier entry==uniqueId) clustalAlignment
        mergedSeq = foldr ((++) . entryAlignedSequence) "" idEntries
        mergedEntry = ClustalAlignmentEntry uniqueId mergedSeq

buildStockholmAlignmentEntries :: Int -> ClustalAlignmentEntry -> String
buildStockholmAlignmentEntries inputSpacerLength entry = entrystring
  where idLength = length (filter (/= '\n') (entrySequenceIdentifier entry))
        spacer = replicate (inputSpacerLength - idLength) ' '
        entrystring = entrySequenceIdentifier entry ++ spacer ++ entryAlignedSequence entry ++ "\n"

retrieveTaxonomicContextEntrez :: Int -> IO (Maybe Taxon)
retrieveTaxonomicContextEntrez inputTaxId = do
       let program' = Just "efetch"
       let database' = Just "taxonomy"
       let taxIdString = show inputTaxId
       let registrationInfo = buildRegistration "RNAlien" "florian.eggenhofer@univie.ac.at"
       let queryString = "id=" ++ taxIdString ++ registrationInfo
       let entrezQuery = EntrezHTTPQuery program' database' queryString 
       result <- entrezHTTP entrezQuery
       if null result
          then do
            error "Could not retrieve taxonomic context from NCBI Entrez, cannot proceed."
            return Nothing
          else do
            let taxon = head (readEntrezTaxonSet result)
            --print taxon
            if null (lineageEx taxon)
              then error "Retrieved taxonomic context taxon from NCBI Entrez with empty lineage, cannot proceed."
              else CE.evaluate (Just taxon)

retrieveParentTaxIdEntrez :: [(BlastHit,Int)] -> IO [(BlastHit,Int)]
retrieveParentTaxIdEntrez blastHitsWithHitTaxids = do
  if not (null blastHitsWithHitTaxids)
     then do
       let program' = Just "efetch"
       let database' = Just "taxonomy"
       let extractedBlastHits = map fst blastHitsWithHitTaxids
       let taxIds = map snd blastHitsWithHitTaxids
       let taxIdStrings = map show taxIds
       let taxIdQuery = intercalate "," taxIdStrings
       let registrationInfo = buildRegistration "RNAlien" "florian.eggenhofer@univie.ac.at"
       let queryString = "id=" ++ taxIdQuery ++ registrationInfo
       let entrezQuery = EntrezHTTPQuery program' database' queryString 
       result <- entrezHTTP entrezQuery
       let parentTaxIds = readEntrezParentIds result
       if null parentTaxIds
         then return []
         else CE.evaluate (zip extractedBlastHits parentTaxIds)
    else return []

-- | Wrapper functions that ensures that only 20 queries are sent per request
retrieveParentTaxIdsEntrez :: [(BlastHit,Int)] -> IO [(BlastHit,Int)]
retrieveParentTaxIdsEntrez taxIdwithBlastHits = do
  let splits = portionListElements taxIdwithBlastHits 20
  taxIdsOutput <- mapM retrieveParentTaxIdEntrez splits
  return (concat taxIdsOutput)

-- | Wrapper functions that ensures that only 20 queries are sent per request
retrieveBlastHitsTaxIdEntrez :: [BlastHit] -> IO [([BlastHit],String)]
retrieveBlastHitsTaxIdEntrez blastHits = do
  let splits = portionListElements blastHits 20
  mapM retrieveBlastHitTaxIdEntrez splits
  

retrieveBlastHitTaxIdEntrez :: [BlastHit] -> IO ([BlastHit],String)
retrieveBlastHitTaxIdEntrez blastHits = do
  if not (null blastHits)
     then do
       let geneIds = map extractGeneId blastHits
       let idList = intercalate "," geneIds
       let registrationInfo = buildRegistration "RNAlien" "florian.eggenhofer@univie.ac.at"
       let query' = "id=" ++ idList ++ registrationInfo
       let entrezQuery = EntrezHTTPQuery (Just "esummary") (Just "nucleotide") query'
       threadDelay 10000000                  
       result <- entrezHTTP entrezQuery
       CE.evaluate (blastHits,result)
     else return (blastHits,"")

extractTaxIdFromEntrySummaries :: String -> [Int]
extractTaxIdFromEntrySummaries input
  | null input = []
  | null parsedResultList = []
  | otherwise = hitTaxIds
  where parsedResultList = readEntrezSummaries input
        parsedResult = head parsedResultList
        blastHitSummaries = documentSummaries parsedResult
        hitTaxIdStrings = map extractTaxIdfromDocumentSummary blastHitSummaries
        hitTaxIds = map readInt hitTaxIdStrings

extractAccession :: BlastHit -> T.Text
extractAccession currentBlastHit = accession'
  where splitedFields = T.splitOn (T.pack "|") (DTE.decodeUtf8 (L.toStrict (hitId currentBlastHit)))
        accession' =  splitedFields !! 3
        
extractGeneId :: BlastHit -> String
extractGeneId currentBlastHit = nucleotideId
  where truncatedId = drop 3 (L.unpack (hitId currentBlastHit))
        pipeSymbolIndex = fromJust (elemIndex '|' truncatedId)
        nucleotideId = take pipeSymbolIndex truncatedId

extractTaxIdfromDocumentSummary :: EntrezDocSum -> String
extractTaxIdfromDocumentSummary documentSummary = itemContent (fromJust (find (\item -> "TaxId" == itemName item) (summaryItems documentSummary)))

getBestHit :: BlastResult -> BlastHit
getBestHit blastResult 
  | null (concatMap hits (results blastResult)) = error "getBestHit - head: empty list"
  | otherwise = head (hits (head (results blastResult)))

-- Blast returns low evalues with zero instead of the exact number
getHitWithFractionEvalue :: BlastResult -> Maybe BlastMatch
getHitWithFractionEvalue blastResult 
  | null (concatMap hits (results blastResult)) = Nothing
  | otherwise = find (\match -> e_val match /= (0 ::Double)) (concatMap matches (concatMap hits (results blastResult)))

showlines :: Show a => [a] -> String
showlines = concatMap (\x -> show x ++ "\n") 

logMessage :: String -> String -> IO ()
logMessage logoutput temporaryDirectoryPath = appendFile (temporaryDirectoryPath ++ "Log") logoutput

logWarning :: String -> String -> IO ()
logWarning logoutput temporaryDirectoryPath = appendFile (temporaryDirectoryPath ++ "log/warnings") logoutput

logVerboseMessage :: Bool -> String -> String -> IO ()
logVerboseMessage verboseTrue logoutput temporaryDirectoryPath 
  | verboseTrue = appendFile (temporaryDirectoryPath ++ "Log") (show logoutput)
  | otherwise = return ()
                  
logEither :: (Show a) => Either a b -> String -> IO ()
logEither (Left logoutput) temporaryDirectoryPath = appendFile (temporaryDirectoryPath ++ "Log") (show logoutput)
logEither  _ _ = return ()

checkTools :: [String] -> String -> String -> IO (Either String String)
checkTools tools temporaryDirectoryPath selectedQuerySelectionMethod = do
  -- if queryselectionmethod is set to clustering then also check for clustal omega
  let additionaltools = if selectedQuerySelectionMethod == "clustering" then tools ++ ["clustalo"] else tools
  -- check if all tools are available via PATH or Left
  checks <- mapM checkTool additionaltools
  if not (null (lefts checks))
    then return (Left (concat (lefts checks)))
    else do  
      logMessage ("Tools : " ++ intercalate "," tools ++ "\n") temporaryDirectoryPath
      return (Right "Tools ok")

logToolVersions :: String -> IO ()
logToolVersions temporaryDirectoryPath = do
  let clustaloversionpath = temporaryDirectoryPath ++ "log/clustalo.version"
  let mlocarnaversionpath = temporaryDirectoryPath ++ "log/mlocarna.version"
  let rnafoldversionpath = temporaryDirectoryPath ++ "log/RNAfold.version"
  let infernalversionpath = temporaryDirectoryPath ++ "log/Infernal.version"
  _ <- system ("clustalo --version >" ++ clustaloversionpath)
  _ <- system ("mlocarna --version >" ++ mlocarnaversionpath)
  _ <- system ("RNAfold --version >" ++ rnafoldversionpath)
  _ <- system ("cmcalibrate -h >" ++ infernalversionpath)  
  -- _ <- system ("RNAz" ++ rnazversionpath)
  -- _ <- system ("CMCompare >" ++ infernalversionpath)
  clustaloversion <- readFile clustaloversionpath
  mlocarnaversion <- readFile mlocarnaversionpath
  rnafoldversion <- readFile rnafoldversionpath 
  infernalversionOutput <- readFile infernalversionpath
  let infernalversion = lines infernalversionOutput !! 1
  let messageString = "Clustalo version: " ++ clustaloversion ++ "mlocarna version: " ++ mlocarnaversion  ++ "RNAfold version: " ++ rnafoldversion  ++ "infernalversion: " ++ infernalversion ++ "\n"
  logMessage messageString temporaryDirectoryPath

checkTool :: String -> IO (Either String String)
checkTool tool = do
  toolcheck <- findExecutable tool
  if isJust toolcheck
    then return (Right (fromJust toolcheck))
    else return (Left ("RNAlien could not find "++ tool ++ " in your $PATH and has to abort.\n"))
  
constructTaxonomyRecordsCSVTable :: ModelConstruction -> String
constructTaxonomyRecordsCSVTable modelconstruction = csvtable
  where tableheader = "Taxonomy Id;Added in Iteration Step;Entry Header\n"
        tablebody = concatMap constructTaxonomyRecordCSVEntries (taxRecords modelconstruction)
        csvtable = tableheader ++ tablebody

constructTaxonomyRecordCSVEntries :: TaxonomyRecord -> String
constructTaxonomyRecordCSVEntries taxRecord = concatMap (constructTaxonomyRecordCSVEntry taxIdString) (sequenceRecords taxRecord)
  where taxIdString = show (recordTaxonomyId taxRecord)

constructTaxonomyRecordCSVEntry :: String -> SequenceRecord -> String
constructTaxonomyRecordCSVEntry taxIdString seqrec = taxIdString ++ ";" ++ show (aligned seqrec) ++ ";" ++ filter checkTaxonomyRecordCSVChar (L.unpack (unSL (seqheader (nucleotideSequence seqrec)))) ++ "\n"

checkTaxonomyRecordCSVChar :: Char -> Bool
checkTaxonomyRecordCSVChar c
  | c == '"' = False
  | c == ';' = False
  | otherwise = True

setVerbose :: Verbosity -> Bool
setVerbose verbosityLevel
  | verbosityLevel == Loud = True
  | otherwise = False

evaluateConstructionResult :: StaticOptions -> ModelConstruction -> IO String
evaluateConstructionResult staticOptions mCResult = do
  let evaluationDirectoryFilepath = tempDirPath staticOptions ++ "evaluation/"
  createDirectoryIfMissing False evaluationDirectoryFilepath
  let fastaFilepath = tempDirPath staticOptions ++ "result.fa"
  let clustalFilepath = evaluationDirectoryFilepath ++ "result.clustal"
  let reformatedClustalPath = evaluationDirectoryFilepath ++ "result.clustal.reformated"
  let cmFilepath = tempDirPath staticOptions ++ "result.cm"
  let resultSequences = (inputFasta mCResult):map nucleotideSequence (concatMap sequenceRecords (taxRecords mCResult))
  let resultNumber = length resultSequences + 1 
  let rnaCentralQueries = map buildSequenceViaMD5Query resultSequences    
  rnaCentralEntries <- getRNACentralEntries rnaCentralQueries
  let rnaCentralEvaluationResult = showRNAcentralAlienEvaluation rnaCentralEntries
  writeFile (tempDirPath staticOptions ++ "result.rnacentral") rnaCentralEvaluationResult
  systemCMalign ("--outformat=Clustal --cpu " ++ show (cpuThreads staticOptions)) cmFilepath fastaFilepath clustalFilepath
  let resultModelStatistics = tempDirPath staticOptions ++ "result.cmstat"
  systemCMstat cmFilepath resultModelStatistics
  inputcmStat <- readCMstat resultModelStatistics
  let cmstatString = cmstatEvalOutput inputcmStat
  if resultNumber > 1
    then do 
      let resultRNAz = tempDirPath staticOptions ++ "result.rnaz"
      let resultRNAcode = tempDirPath staticOptions ++ "result.rnacode"
      rnazClustalpath <- preprocessClustalForRNAcodeExternal clustalFilepath reformatedClustalPath
      if isRight rnazClustalpath
        then do
          systemRNAz "-l" (fromRight rnazClustalpath) resultRNAz 
          inputRNAz <- readRNAz resultRNAz
          let rnaZString = rnaZEvalOutput inputRNAz
          RC.systemRNAcode " -t " (fromRight rnazClustalpath) resultRNAcode
          inputRNAcode <- RC.readRNAcodeTabular resultRNAcode
          let rnaCodeString = rnaCodeEvalOutput inputRNAcode
          return ("\nEvaluation of RNAlien result :\nCMstat statistics for result.cm\n" ++ cmstatString ++ "\nRNAz statistics for result alignment: " ++ rnaZString ++ "\nRNAcode output for result alignment:\n" ++ rnaCodeString ++ "\nSequences found by RNAlien with RNAcentral entry:\n" ++ rnaCentralEvaluationResult)
        else do
          logWarning ("Running RNAz for result evalution encountered a problem:" ++ fromLeft rnazClustalpath) (tempDirPath staticOptions) 
          return ("\nEvaluation of RNAlien result :\nCMstat statistics for result.cm\n" ++ cmstatString ++ "\nRNAz statistics for result alignment: Running RNAz for result evalution encountered a problem\n" ++ fromLeft rnazClustalpath ++ "\n" ++ "Sequences found by RNAlien with RNAcentral entry:\n" ++ rnaCentralEvaluationResult)
    else do
      logWarning "Message: RNAlien could not find additional covariant sequences\n Could not run RNAz statistics. Could not run RNAz statistics with a single sequence.\n" (tempDirPath staticOptions) 
      return ("\nEvaluation of RNAlien result :\nCMstat statistics for result.cm\n" ++ cmstatString ++ "\nRNAlien could not find additional covariant sequences. Could not run RNAz statistics with a single sequence.\n\nSequences found by RNAlien with RNAcentral entry:\n" ++ rnaCentralEvaluationResult)


cmstatEvalOutput :: Either ParseError CMstat -> String 
cmstatEvalOutput inputcmstat
  | isRight inputcmstat = cmstatString
  | otherwise = show (fromLeft inputcmstat)
    where cmStat = fromRight inputcmstat  
          cmstatString = "  Sequence Number: " ++ show (statSequenceNumber cmStat)++ "\n" ++ "  Effective Sequences: " ++ show (statEffectiveSequences cmStat)++ "\n" ++ "  Consensus length: " ++ show (statConsensusLength cmStat) ++ "\n" ++ "  Expected maximum hit-length: " ++ show (statW cmStat) ++ "\n" ++ "  Basepairs: " ++ show (statBasepairs cmStat)++ "\n" ++ "  Bifurcations: " ++ show (statBifurcations cmStat) ++ "\n" ++ "  Modeltype: " ++ show (statModel cmStat) ++ "\n" ++ "  Relative Entropy CM: " ++ show (relativeEntropyCM cmStat) ++ "\n" ++ "  Relative Entropy HMM: " ++ show (relativeEntropyHMM cmStat) ++ "\n"

rnaZEvalOutput :: Either ParseError RNAz -> String 
rnaZEvalOutput inputRNAz 
  | isRight inputRNAz = rnazString
  | otherwise = show (fromLeft inputRNAz)
    where rnaZ = fromRight inputRNAz
          rnazString = "  Mean pairwise identity: " ++ show (meanPairwiseIdentity rnaZ) ++ "\n  Shannon entropy: " ++ show (shannonEntropy rnaZ) ++  "\n  GC content: " ++ show (gcContent rnaZ) ++ "\n  Mean single sequence minimum free energy: " ++ show (meanSingleSequenceMinimumFreeEnergy rnaZ) ++ "\n  Consensus minimum free energy: " ++ show (consensusMinimumFreeEnergy rnaZ) ++ "\n  Energy contribution: " ++ show (energyContribution rnaZ) ++ "\n  Covariance contribution: " ++ show (covarianceContribution rnaZ) ++ "\n  Combinations pair: " ++ show (combinationsPair rnaZ) ++ "\n  Mean z-score: " ++ show (meanZScore rnaZ) ++ "\n  Structure conservation index: " ++ show (structureConservationIndex rnaZ) ++ "\n  Background model: " ++ backgroundModel rnaZ ++ "\n  Decision model: " ++ decisionModel rnaZ ++ "\n  SVM decision value: " ++ show (svmDecisionValue rnaZ) ++ "\n  SVM class propability: " ++ show (svmRNAClassProbability rnaZ) ++ "\n  Prediction: " ++ prediction rnaZ

rnaCodeEvalOutput :: Either ParseError RC.RNAcode -> String 
rnaCodeEvalOutput inputRNAcode 
  | isRight inputRNAcode = rnaCodeString
  | otherwise = show (fromLeft inputRNAcode)
    where rnaCode = fromRight inputRNAcode
          rnaCodeString = "HSS\tFrame\tLength\tFrom\tTo\tName\tStart\tEnd\tScore\tP\n" ++ rnaCodeEntries
          rnaCodeEntries = concatMap showRNACodeHits (RC.rnacodeHits rnaCode)

showRNACodeHits :: RC.RNAcodeHit -> String
showRNACodeHits rnacodeHit = show (RC.hss rnacodeHit) ++ "\t" ++ show (RC.frame rnacodeHit) ++ "\t" ++ show (RC.hitLength rnacodeHit) ++ "\t"++ show (RC.from rnacodeHit) ++ "\t" ++ show (RC.to rnacodeHit) ++ "\t" ++ (RC.name rnacodeHit) ++ "\t" ++ show (RC.start rnacodeHit) ++ "\t" ++ show (RC.end rnacodeHit) ++ "\t" ++ show (RC.score rnacodeHit) ++ show (RC.pvalue rnacodeHit) ++ "\n"

-- | Call for external preprocessClustalForRNAz
preprocessClustalForRNAzExternal :: String -> String -> IO (Either String String)
preprocessClustalForRNAzExternal clustalFilepath reformatedClustalPath = do
  clustalText <- TI.readFile clustalFilepath
  --change clustal format for rnazSelectSeqs.pl
  let reformatedClustalText = T.map reformatAln clustalText
  TI.writeFile reformatedClustalPath reformatedClustalText
  --select representative entries from result.Clustal with select_sequences
  let selectedClustalpath = clustalFilepath ++ ".selected"
  system ("rnazSelectSeqs.pl " ++ reformatedClustalPath ++ " >" ++ selectedClustalpath)
  return (Right selectedClustalpath)

-- | Call for external preprocessClustalForRNAcode - RNAcode additionally to RNAz requirements does not accept pipe,underscore, doublepoint symbols
preprocessClustalForRNAcodeExternal :: String -> String -> IO (Either String String)
preprocessClustalForRNAcodeExternal clustalFilepath reformatedClustalPath = do
  clustalText <- TI.readFile clustalFilepath
  --change clustal format for rnazSelectSeqs.pl
  let clustalTextLines = T.lines clustalText
  let headerClustalTextLines = T.unlines (take 2 clustalTextLines)
  let headerlessClustalTextLines = T.unlines (drop 2 clustalTextLines)
  let reformatedClustalText = T.map reformatRNACodeAln headerlessClustalTextLines
  TI.writeFile reformatedClustalPath (headerClustalTextLines `T.append` (T.singleton '\n') `T.append` reformatedClustalText)
  --select representative entries from result.Clustal with select_sequences
  let selectedClustalpath = clustalFilepath ++ ".selected"
  system ("rnazSelectSeqs.pl " ++ reformatedClustalPath ++ " >" ++ selectedClustalpath)
  return (Right selectedClustalpath)

-- | RNAz can process 500 sequences at max. Using rnazSelectSeqs to isolate representative sample. rnazSelectSeqs only accepts - gap characters, alignment is reformatted accordingly.
preprocessClustalForRNAz :: String -> String -> IO (Either String String)
preprocessClustalForRNAz clustalFilepath reformatedClustalPath = do
  clustalText <- TI.readFile clustalFilepath
  let clustalTextLines = T.lines clustalText
  if length clustalTextLines > 500
    then do 
      --change clustal format for rnazSelectSeqs.pl
      let reformatedClustalString = T.map reformatAln clustalText
      TI.writeFile reformatedClustalPath reformatedClustalString
      --select representative entries from result.Clustal with select_sequences
      let selectedClustalpath = clustalFilepath ++ ".selected"
      parsedClustalInput <- readClustalAlignment clustalFilepath
      if isRight parsedClustalInput
        then do
          let filteredClustalInput = rnaZSelectSeqs (fromRight parsedClustalInput) 500 99
          writeFile selectedClustalpath (show filteredClustalInput)
          return (Right selectedClustalpath)
        else return (Left (show (fromLeft parsedClustalInput)))
    else return (Right clustalFilepath)


-- Iteratively removes sequences with decreasing similarity until target number of alignment entries is reached.
rnaZSelectSeqs :: ClustalAlignment -> Int -> Double -> ClustalAlignment
rnaZSelectSeqs currentClustalAlignment targetEntries identityCutoff
  | targetEntries < numberOfEntries = rnaZSelectSeqs filteredAlignment targetEntries (identityCutoff - 1)
  | otherwise = currentClustalAlignment
  where numberOfEntries =  length (alignmentEntries currentClustalAlignment) 
        filteredEntries = filterIdenticalAlignmentEntry (alignmentEntries currentClustalAlignment) identityCutoff 
        filteredAlignment = ClustalAlignment filteredEntries (conservationTrack currentClustalAlignment)

reformatRNACodeAln :: Char -> Char 
reformatRNACodeAln c
  | c == ':' = '-'
  | c == '|' = '-'
  | c == '.' = '-'
  | c == '~' = '-'
  | c == '_' = '-'
  | c == 'u' = 'U'
  | c == 't' = 'T'
  | c == 'g' = 'G'
  | c == 'c' = 'C'
  | c == 'a' = 'A'
  | otherwise = c

reformatAln :: Char -> Char 
reformatAln c
  | c == '.' = '-'
  | c == '~' = '-'
  | c == '_' = '-'
  | c == 'u' = 'U'
  | c == 't' = 'T'
  | c == 'g' = 'G'
  | c == 'c' = 'C'
  | c == 'a' = 'A'
  | otherwise = c

-- | Check if alien can connect to NCBI
checkNCBIConnection :: IO (Either String String)
checkNCBIConnection = do
   response <- simpleHTTP (getRequest "http://www.ncbi.nlm.nih.gov")
   if isRight response
     then do
       let rightResponse = fromRight response
       if rspCode rightResponse == (2,0,0)
         then return (Right ("Network connection with NCBI server is ok: "  ++ show (rspCode rightResponse)))
         else return (Left ("Could not connect to NCBI server \"http://www.ncbi.nlm.nih.gov\". Response Code: " ++ show (rspCode rightResponse)))
     else return (Left ("Could not connect to NCBI server: \"http://www.ncbi.nlm.nih.gov\": " ++ show (fromLeft response)))

-- | Blast evalue is set stricter in inital alignment mode
setBlastExpectThreshold :: ModelConstruction -> Double
setBlastExpectThreshold modelConstruction
  | alignmentModeInfernal modelConstruction = 1 :: Double
  | otherwise = 0.1 :: Double

reformatFasta :: Sequence -> Sequence
reformatFasta input = Seq (seqheader input) updatedSequence Nothing
  where updatedSequence = SeqData (L.pack (map reformatFastaSequence (L.unpack (unSD (seqdata input)))))

reformatFastaSequence :: Char -> Char 
reformatFastaSequence c
  | c == '.' = '-'
  | c == '~' = '-'
  | c == '_' = '-'
  | c == 'u' = 'T'
  | c == 't' = 'T'
  | c == 'g' = 'G'
  | c == 'c' = 'C'
  | c == 'a' = 'A'
  | c == 'U' = 'T'
  | otherwise = c

setRestrictedTaxonomyLimits :: String -> (Maybe Int,Maybe Int)
setRestrictedTaxonomyLimits trestriction 
  | trestriction == "bacteria" = (Just (2 :: Int), Nothing)
  | trestriction == "archea" = (Just (2157 :: Int), Nothing)
  | trestriction == "eukaryia" = (Just (2759 :: Int), Nothing)
  | otherwise = (Nothing, Nothing)

checkTaxonomyRestriction :: Maybe String -> Maybe String
checkTaxonomyRestriction taxonomyRestriction
  | isJust taxonomyRestriction = checkTaxonomyRestrictionString (fromJust taxonomyRestriction)
  | otherwise = Nothing

checkTaxonomyRestrictionString :: String -> Maybe String
checkTaxonomyRestrictionString restrictionString
  | restrictionString == "archea" = Just "archea"
  | restrictionString == "bacteria" = Just "bacteria"
  | restrictionString == "eukaryia" = Just "eukaryia"
  | otherwise = Nothing

extractAlignmentSequencesByIds :: String -> [L.ByteString] -> IO [Sequence]
extractAlignmentSequencesByIds stockholmFilePath sequenceIds = do
  inputSeedAln <- TIO.readFile stockholmFilePath
  let alnEntries = extractAlignmentSequences inputSeedAln
  --let splitIds = map E.encodeUtf8 (TL.splitOn (TL.pack ",") (TL.pack sequenceIds))
  let filteredEntries = concatMap (filterSequencesById alnEntries) sequenceIds
  return filteredEntries
 
extractAlignmentSequences :: TL.Text -> [Sequence]
extractAlignmentSequences  seedFamilyAln = rfamIDAndseedFamilySequences
  where seedFamilyAlnLines = TL.lines seedFamilyAln
        -- remove empty lines from splitting
        seedFamilyNonEmpty =  filter (\alnline -> not (TL.empty == alnline)) seedFamilyAlnLines
        -- remove annotation and spacer lines
        seedFamilyIdSeqLines =  filter (\alnline -> ((not ((TL.head alnline) == '#'))) && (not ((TL.head alnline) == ' ')) && (not ((TL.head alnline) == '/'))) seedFamilyNonEmpty 
        -- put id and corresponding seq of each line into a list and remove whitspaces        
        seedFamilyIdandSeqLines = map TL.words seedFamilyIdSeqLines
        -- linewise tuples with id and seq without alinment characters - .
        seedFamilyIdandSeqLineTuples = map (\alnline -> ((head alnline),(filterAlnChars (last alnline)))) seedFamilyIdandSeqLines
        -- line tuples sorted by id
        seedFamilyIdandSeqTupleSorted = sortBy (\tuple1 tuple2 -> compare (fst tuple1) (fst tuple2)) seedFamilyIdandSeqLineTuples
        -- line tuples grouped by id
        seedFamilyIdandSeqTupleGroups = groupBy (\tuple1 tuple2 -> (fst tuple1) == (fst tuple2)) seedFamilyIdandSeqTupleSorted
        seedFamilySequences = map mergeIdSeqTuplestoSequence seedFamilyIdandSeqTupleGroups
        rfamIDAndseedFamilySequences = seedFamilySequences

filterSequencesById :: [Sequence] -> L.ByteString -> [Sequence]
filterSequencesById alignmentSequences sequenceId = filter (sequenceHasId sequenceId) alignmentSequences

sequenceHasId :: L.ByteString -> Sequence -> Bool
sequenceHasId sequenceId currentSequence = sequenceId == (unSL (seqid currentSequence))

filterAlnChars :: TL.Text -> TL.Text
filterAlnChars cs = TL.filter (\c -> (not (c == '-')) && (not (c == '.'))) cs

mergeIdSeqTuplestoSequence :: [(TL.Text,TL.Text)] -> Sequence
mergeIdSeqTuplestoSequence tuplelist = currentSequence
  where seqId = fst (head tuplelist)
        seqData = TL.concat (map snd tuplelist)
        currentSequence = Seq (SeqLabel (E.encodeUtf8 seqId)) (SeqData (E.encodeUtf8 seqData)) Nothing