{-# LANGUAGE MultiParamTypeClasses, FlexibleContexts, CPP #-}
{-|
Module      : Main
Description : Program for re-building the Advise-Me database offline.

Intended to be used seperately from the main software. It can do the following:

   - Remove double requests and merge those requests which provide inputs for
     subtasks of the same task.
   - Rerun the domain reasoner.
   - Rerun the student models.

-}
module Main (main) where

import qualified GHC.IO.Encoding as E


import Data.Time.Clock (UTCTime)
import Data.Time.Format (parseTimeM, defaultTimeLocale)
import Data.List (sortBy, nubBy, foldl', intercalate, isPrefixOf, nub)
import Data.Char (isSpace)
import Data.Function (on)
import Data.Monoid ((<>))
import Data.Maybe (fromJust, fromMaybe, isJust)
import Control.Monad (unless, when, forM_, forM)
import Control.Applicative ((<**>))
import Control.Arrow ((&&&))
import Database.HDBC.Sqlite3 (connectSqlite3, setBusyTimeout, Connection)
import qualified Data.Text as T
import qualified Data.Map as M
import qualified Control.Monad.State.Lazy as S
import qualified Options.Applicative as O
import qualified Ideas.Text.XML as XML
--import qualified Ideas.Text.XML.Interface as XML
import qualified Ideas.Text.UTF8 as WorkaroundUTF8
import qualified Database.HDBC as SQL
import qualified Data.ByteString.Char8 as Char8
import qualified Data.Text.Encoding as T ( decodeLatin1 )

import Ideas.Common.Id (showId)

import Util.NumberRange (NumberRange, range)
import Util.List (concatMapM, index)
import Util.String (split, replace, trim, splitOn, strToLower)
import Main.Tasks (tasks)
import Main.ParserCSV (readFileCSV)
import Service.AdviseMe (runDomainReasoner)
import Bayes.Script ( findEvidence, taskConcepts, aggregateConcepts )
import Bayes.Evidence (Evidence, evStates)
import Database.Data
import System.FilePath ( (</>) )

#ifdef XLSX
import Database.HumanAssessment ( readEvidence, tally', getSpreadsheet, getHumanMachineFiles, Tally(..), Match(..) )
#endif

-------------------------------------------------------------------------------
-- * Interface

data Arguments = Arguments
   { inDBs :: [FilePath]
   , outDB :: FilePath
   , appendFlag :: Bool
   , generatedEvidence :: Maybe FilePath
#ifdef XLSX
   , replaceEvidenceArg :: [FilePath]
#endif
   , taskFilterArg :: Maybe [String]
   , trimFlag :: Bool
   , collapseFlag :: Bool
   , fixNumbersFlag :: Bool
   , drFlag :: Bool
   , smFlag :: Bool
   , writeFinalModelsFlag :: Bool
#ifdef XLSX
   , writeHumanAssessmentTableFlag :: [FilePath]
#endif
   , fixEncodingFlag :: Bool
   , fixPepiteFlag :: Bool
   , anonymizeFlag :: Bool
   }


cli :: IO Arguments
cli = O.execParser $ O.info (args <**> infoArgs <**> O.helper) info

   where

   info :: O.InfoMod Arguments
   info = O.progDesc "Advise-Me database rebuilder"

   infoArgs =
      O.infoOption
         (  "Available tasks: " ++ intercalate ", " taskIDs )
         (  O.short 'l' <>
            O.long "list-tasks" <>
            O.help "List available task IDs"
         )

   args :: O.Parser Arguments
   args = Arguments
      <$>
         ( O.many . O.strOption $
            O.short 'i' <> O.long "input" <>
            O.help "Input database(s)"
         )
      <*>
         ( O.strOption $
            O.short 'o' <> O.long "output" <>
            O.help "Output database"
         )
      <*>
         ( O.switch $
            O.short 'a' <> O.long "append" <>
            O.help "Whether to overwrite or append to an existing table"
         )
      <*>
         ( O.optional . O.strOption $
            O.short 'g' <> O.long "generate" <>
            O.help "Generate simulated students from evidence for tasks in CSV files"
         )
#ifdef XLSX
      <*>
         ( O.many . O.strOption $
            O.long "replace-evidence" <>
            O.help "Replace evidence for existing students with evidence from human-assessed spreadsheet"
         )
#endif
      <*>
         ( fmap (fmap $ map trim . split ',') . O.optional . O.strOption $
            O.long "filter" <>
            O.help "Put requests in the given order; discard all others (expects comma-seperated list of task IDs)"
         )
      <*>
         ( O.switch $
            O.long "no-empty" <>
            O.help "Discard input that consist solely of whitespace"
         )
      <*>
         ( O.switch $
            O.long "collapse" <>
            O.help "Merge subtask requests and keep only final input"
         )
      <*>
         ( O.switch $
            O.long "fix-numbers" <>
            O.help "Update answers to tasks in which numbers have changed; this is a hack to test for regressions with old data"
         )
      <*>
         ( O.switch $
            O.long "evidence" <>
            O.help "Rerun the domain reasoner (implies --models)"
         )
      <*>
         ( O.switch $
            O.long "models" <>
            O.help "Rerun the task- and student models"
         )
      <*>
         ( O.switch $
            O.long "models-table" <>
            O.help "Add a table that details the final student models"
         )
#ifdef XLSX
      <*>
         ( O.many . O.strOption $
            O.long "human-assessment-table" <>
            O.help "Add a table detailing human assessments based on the given spreadsheet(s)"
         )
#endif
      <*>
         ( O.switch $
            O.long "fix-encoding" <>
            O.help "Fix encoding issues: transforms input in ISO-8859-1 to UTF-8"
         )
      <*>
         ( O.switch $
            O.long "fix-pepite" <>
            O.help "Fix incorrect Pépite requests"
         )
      <*>
         ( O.switch $
            O.long "anonymize" <>
            O.help "Anonymize student data"
         )

main :: IO ()
main = do
   E.setLocaleEncoding E.utf8
   arg <- cli

   let filterFlag = isJust . taskFilterArg
       taskFilter = fromMaybe [] $ taskFilterArg arg

   inConns <- mapM connectSqlite3 (inDBs arg)

   -- Test if a students table already exist
   (anyStudentsTable, allStudentsTable) <- do
      hasTable <- map ("students" `elem`) <$> mapM SQL.getTables inConns
      return (or hasTable, and hasTable)

   -- Check for nonsensical commands
   unless (smFlag arg `implies` (allStudentsTable || drFlag arg)) $
      error "Cannot run student modeller without evidence. Either run --evidence or make sure your input databases already contain students tables."

   unless ((filterFlag arg) `implies`
          (not anyStudentsTable || smFlag arg)) $
      error "Your flags imply that requests may be changed, but your database contains a students table. Also run --models to make sure the models stay consistent."

   forM_ taskFilter $ \tID ->
      when (not $ tID `elem` taskIDs) $
         error $ "Task ID \"" ++ tID ++ "\" is not recognized."

   -- Determine the order in which the requests are sent
   let sortRequests
        | filterFlag arg  = return . sortBy
                            (\x y -> compareFilter taskFilter x y
                                  <> compareTimes x y)
        | otherwise       = return . sortBy compareAppearance


   -- Obtain requests from all databases and perform operations
   processedRecords <- concatMapM allRecords inConns
      >>= task "fix encoding"
         fixEncoding (fixEncodingFlag arg)
      >>= task "filling in missing information"
         decorateRequests True
      >>= task "removing rectanglearea_old"
         (return . filter (\r -> r ! "taskid" /= "rectanglearea_old")) True
      >>= task "filtering tasks"
         (filterTasks taskFilter) (filterFlag arg)
      >>= task "Anonymizing data"
         anonymize (anonymizeFlag arg)
      >>= task "fix Pépite"
         fixPepite (fixPepiteFlag arg)
      >>= task "collapsing multiple requests for same task"
         collapseRequests (collapseFlag arg)
      >>= task "trimming empty inputs from requests"
         trimEmptyRequests (trimFlag arg)
      >>= task "changing numbers in old task answers"
         changeNumbersInOldRequests (fixNumbersFlag arg)
      >>= task "determining request order"
         sortRequests True
      >>= task "domain reasoner"
         domainReasoner (drFlag arg)
#ifdef XLSX
      >>= task "replacing evidence, drawing from human assessments"
         (replaceEvidence $ replaceEvidenceArg arg) (length (replaceEvidenceArg arg) > 0)
#endif
      >>= task "student modeller"
         studentModeller (smFlag arg || drFlag arg)

   -- Obtain generated evidence
   generatedRecords <- maybe (return []) simulateStudents (generatedEvidence arg)

   -- Combine both
   records <-
      task "numbering requests"
         (return . addRequestNumbers) True (processedRecords ++ generatedRecords)

   -- Output connection
   conn <- connectSqlite3 (outDB arg)
   setBusyTimeout conn 200

   -- Always write requests table
   unless (appendFlag arg) $ dropTable conn requestsTable
   createTable conn requestsTable
   insertRecords conn requestsTable records

   -- Write students table if we have enough information
   when (allStudentsTable || drFlag arg || null records) $ do
      unless (appendFlag arg) $ dropTable conn studentsTable
      createTable conn studentsTable
      insertRecords conn studentsTable records

   -- Write final models table
   when (writeFinalModelsFlag arg) $
      addModelsTable conn records

#ifdef XLSX
   -- Write final models table
   let humanAssessments = writeHumanAssessmentTableFlag arg
   let allObserved = foldl (\m r -> M.insertWith mappend (r!"studentid",r!"taskid") (r ! "evidence") m) mempty records
   when (not $ null humanAssessments) $ do
      tallies <- forM humanAssessments $ \path -> do
         xlsx <- getSpreadsheet path
         tally' xlsx allObserved
      writeHumanAssessmentTable conn . mconcat . concat . map M.elems $ tallies
#endif

   -- Finalize
   SQL.commit conn
   SQL.disconnect conn
   mapM_ SQL.disconnect inConns


-- | Auxiliary: List all possible task IDs.
taskIDs :: [String]
taskIDs = map showId tasks


-- | Auxiliary: Logical material implication.
implies :: Bool -> Bool -> Bool
x `implies` y = not x || y


-- | Since there is no way I can see to uniquely identify a record logged by
-- Ideas, we add request numbers in the following way. This should happen last,
-- so that we can be certain that the request numbers really correspond to the
-- correct requests. (In fact, identifying by row ID's will also work since
-- they should correspond one to one in the live database.)
addRequestNumbers :: [SqlRecord] -> [SqlRecord]
addRequestNumbers = zipWith (insert "requestnr") [1 :: Int ..]



-------------------------------------------------------------------------------
-- * Sorting functions

-- | Compare records by the index of their task ID w.r.t. the given list.
compareFilter :: [TaskID] -> SqlRecord -> SqlRecord -> Ordering
compareFilter taskFilter = compare `on` (index taskFilter . (! "taskid"))


-- | Compare records by their logged time.
compareTimes :: SqlRecord -> SqlRecord -> Ordering
compareTimes = compare `on` ((! "time") :: SqlRecord -> UTCTime)


-- | Compare records by their order of appearance in the original database.
compareAppearance :: SqlRecord -> SqlRecord -> Ordering
compareAppearance = compare `on` ((! "rowid") :: SqlRecord -> Integer)



-------------------------------------------------------------------------------
-- * Processing steps
--
-- Note, in general, that due to laziness the logging messages might not
-- correspond to actual running time. Perhaps a proper debugging logger would
-- be helpful.


-- | Decorate processing function with information and possibly skip the
-- function.
task :: String
     -> ([SqlRecord] -> IO [SqlRecord])
     -> Bool
     -> ([SqlRecord] -> IO [SqlRecord])
task report function True rows = do
   putStrLn $ "Running: " ++ report ++ "…"
   function rows
task report _ False rows = do
   putStrLn $ "Skipping: " ++ report ++ "."
   return rows


-- | Discard records for all requests for tasks that are not given as an
-- argument.
filterTasks :: [String] -> [SqlRecord] -> IO [SqlRecord]
filterTasks tIDs = return .  filter (\r -> r ! "taskid" `elem` tIDs)


-- | There are three seperate issues that have affected the encoding of pilot
-- data:
--
-- 1. When parsing XML, some HTML entities get transformed to their unescaped
-- Unicode character counterpart.
-- 2. When parsing XML, Unicode characters wreak havoc.
-- 3. When writing XML, it is encoded as ISO-8859-1 rather than UTF-8. This
-- causes issues since data written to the Sqlite database is assumed to be
-- UTF-8.
--
-- See issue #239 for a description on how these interplay.
--
-- Since the pilot data has already been collected, this function re-encodes
-- the wrongly encoded Unicode characters, after which they are replaced with
-- corresponding HTML entities. For future data, this should not be necessary:
-- we should always use UTF-8, we should never auto-unescape, and we should
-- either always expect escaped characters, or understand Unicode.
fixEncoding :: [SqlRecord] -> IO [SqlRecord]
fixEncoding records = do
   forM records $ \record ->
      let input = addCodepoints . fixEntities . T.unpack . T.decodeLatin1 . Char8.pack $ record ! "input"
      in do
         --mapM putChar (filter (\x -> not $ x == '\10' || x >= ' ' || x <= '~') input)
         return . insert "input" input $ record

   where
   -- | Temporary solution to solve issue #240 & #253. The issue was that
   -- parseXML expects a string without actual Unicode characters - not a
   -- bytestring, but one that only contains Unicode code points nonetheless,
   -- which it will try to decode. This is wrong and should change in later
   -- versions of IDEAS, but for now we will encode the string to avoid issues.
   addCodepoints :: String -> String
   addCodepoints = id --WorkaroundUTF8.encode

   -- | Fix HTML entities DWO sent wrong. (see issue #242)
   fixEntities :: String -> String
   fixEntities = flip (foldr ($)) $ map (uncurry replace)
      [ ("&lt;;","&lt;") -- This is super hacky, but so is the problem itself
      , ("&lt","&lt;")
      ]


-- | Add identifying information that is not logged by Ideas. Clearly, this is
-- not an ideal solution, but it will do for now.
decorateRequests :: [SqlRecord] -> IO [SqlRecord]
decorateRequests = mapM $ \record ->
   let  input = record ! "input"
        (sID,tID) = fromJust . collectIDs $ input
        iIDs = intercalate "," . map fst . fromJust . collectInputs $ input
        rID = range [record ! "rowid"]
   in return
         . insert "inputs" iIDs
         . insert "studentid" sID
         . insert "taskid" tID
         . insert "original" rID
         $ record


-- | Translate student IDs to anonymized IDs.
anonymize :: [SqlRecord] -> IO [SqlRecord]
anonymize records = 
   let translation :: M.Map String String
       translation = M.fromList $ zip (nub $ map (! "studentid") records) (map show [1..])
   in forM records $ \record -> do
      let sID' = translation M.! (record ! "studentid")
      return $ insert "studentid" sID'
             . insert "input" (setID sID' $ record ! "input")
             . insert "output" (setID sID' $ record ! "output")
             $ record

   where
   setID :: String -> XML.XML -> XML.XML
   setID sID = XML.foldXML (\t as cs -> XML.makeXML t . mconcat $ map (mkAttr' sID) as ++ map (either XML.string XML.builder) cs) id id

   mkAttr' :: String -> XML.Attribute -> XML.XMLBuilder
   mkAttr' sID ("userid" XML.:= _) = "userid" XML..=. sID
   mkAttr' sID ("user" XML.:= _) = "user" XML..=. sID
   mkAttr' sID (n XML.:= a) = n XML..=. a

-- | Collapse subtasks, such that every student/task pair has only a single
-- request, namely, a merger of all requests for that pair.
collapseRequests :: [SqlRecord] -> IO [SqlRecord]
collapseRequests records = do
   --forM records $ \record -> do
   --   mapM (\x -> putStrLn $ show x ++ show (fromEnum x)) (record ! "input" :: String)
   --   print $ XML.parseXML (record ! "input")
   -- Extract decorated records and reinsert the XML
   let collapsed = map (uncurry $ insert "input") . M.elems
                 -- Map student/task pairs to decorated records
                 . foldl' (\m e -> M.insertWith merger (getKey e) e m) M.empty
                 -- Decorate each record with its XML, to avoid re-parsing it on every merge
                 . map ((! "input") &&& id)
                 $ records
   putStrLn $ "Collapsed " ++ show (length records) ++ " records into " ++ show (length collapsed) ++ "."
   return collapsed

   where
   merger (newXML, r1) (oldXML, r2) = (fromJust $ mergeRequest newXML oldXML, mergeIDs r1 r2)

   getKey :: (XML.XML, SqlRecord) -> (StudentID, TaskID)
   getKey = fromJust . collectIDs . fst

   -- keep the rightmost record but merge its id with the left record
   mergeIDs :: SqlRecord -> SqlRecord -> SqlRecord
   mergeIDs x y = insert "original" (x ! "original" <> y ! "original" :: NumberRange) x

   -- Merge two request XMLs such that the result contains the inputs of both,
   -- but is otherwise identical to the one in the first argument.
   mergeRequest :: Monad m => XML.XML -> XML.XML -> m XML.XML
   mergeRequest new old = do
      new' <- collectInputs new
      old' <- collectInputs old
      return . replaceInputs new
             . map (Right . snd)
             . sortBy (compare `on` fst)
             . nubBy ((==) `on` fst)
             $ new' ++ old'

mkAttr (n XML.:= a) = n XML..=. a
mkContent f = either XML.string (XML.builder . f)

-- | Auxiliary: Remove the inputs from a request and replace them with the
-- given inputs.
replaceInputs :: XML.XML -> [Either String XML.XML] -> XML.XML
replaceInputs xml inputs = XML.foldXML foldRequest id id xml

   where 

   mkXML :: XML.Name -> [XML.Attribute] -> [Either String XML.XML] -> XML.XML
   mkXML t as cs = XML.makeXML t . mconcat $ map mkAttr as ++ map (mkContent id) cs

   foldRequest "request" as cs = XML.makeXML "request" . mconcat $ map mkAttr as ++ map (mkContent $ XML.foldXML foldSolution id id) cs
   foldRequest t as cs = mkXML t as cs
   
   foldSolution "solution" as cs = XML.makeXML "solution" . mconcat $ map mkAttr as ++ map (mkContent id) inputs
   foldSolution t as cs = mkXML t as cs


-- | Trim requests that are empty or consist exclusively of whitespace.
trimEmptyRequests :: [SqlRecord] -> IO [SqlRecord]
trimEmptyRequests records = do
   let trimmed = [ insert "input" (replaceInputs request $ map Right inputs') record
                 | record <- records
                 , let request = record ! "input"
                 , inputs <- map snd <$> collectInputs request
                 , let inputs' = filter (not . isEmpty . XML.content) inputs
                 , not (null inputs') ]
   putStrLn $ "Trimmed " ++ show (length records) ++ " records into " ++ show (length trimmed)
   return trimmed

   where

   isEmpty :: [Either String XML.XML] -> Bool
   isEmpty (Left s:xs) = all isSpace s && isEmpty xs
   isEmpty (Right _:_) = False
   isEmpty [] = True


-- | Sometimes, tasks are updated just to the extent that the numbers change.
-- In order for the tests for those tasks to remain usable, we change the input
-- fo the pilot data in such a way that any number that would have been
-- recognized beforehand is changed to the new number. Of course, this is far
-- from bulletproof,  but the data will be worth more than if we didn't do it.
changeNumbersInOldRequests :: [SqlRecord] -> IO [SqlRecord]
changeNumbersInOldRequests = return . map maybeChange

   where

   maybeChange :: SqlRecord -> SqlRecord
   maybeChange r | condition "carrental" "2019-01-01" r = adjust "input" carrental . adjust "original" (++"*") $ r
                 | condition "matryoshka" "2019-01-01" r = adjust "input" matryoshka . adjust "original" (++"*") $ r
                 | otherwise = r

   condition :: TaskID -> String -> SqlRecord -> Bool
   condition tID day r =
      r ! "taskid" == tID &&
      r ! "time" < (fromJust $ parseTimeM False defaultTimeLocale "%Y-%m-%d" day :: UTCTime)

   carrental :: String -> String
   carrental string = foldr (uncurry replace) string
      [ ("93.8", "92.0")
      , ("93,8", "92,0")
      , ("19.5", "20.0")
      , ("19,5", "20,0")
      , ("42.95", "43.0")
      , ("42,95", "43,0")
      , ("23.45", "23.0")
      , ("23,45", "23,0")]

   matryoshka :: String -> String
   matryoshka string = replace " 6 " " 5 " string



-- | Fix input in Pépite database, where requests don't always conform to what
-- IDEAS expects (see issues #257 and #258). 
-- This is unreadable, but it's just a hack for the FR-201905* pilots
fixPepite :: [SqlRecord] -> IO [SqlRecord]
fixPepite records = do
   forM records $ \record -> do
      let input = record ! "input"
      source <- collectSource input
      if source /= "pepite"
         then return record
         else do
            (_, tID) <- collectIDs input
            inputs <- map snd <$> collectInputs input
            let inputs' = ($ inputs) $ stripv0 . 
                  case tID of
                     "makingasquare" -> transformIDs [("1","01")]
                     "matryoshka" -> transformIDs [("1","02")]
                     "carrental" -> transformIDs [("1","03")]
                     "pattern" -> transformIDs [("1","04")]
                     "magictrick" -> transformIDs [("1","05")]
                     "rectanglearea" -> transformIDs [("1","06a"), ("2","06b"), ("3","06c")] . separate
                     "theatrerate" -> transformIDs [("1","07a"),("3","07b")]
                     "areaandexpression" -> fixMatrix
                     "areaofatriangle" -> transformIDs [("1","09a"), ("2","09b"),("3","09c")] . separate
                     "vpattern" -> transformIDs [("1","10")]
            let input' = replaceInputs input $ map Right inputs'
            return $ insert "input" input' record


   where

   -- | Remove occurrences of the string "v0"
   stripv0 :: [XML.XML] -> [XML.XML]
   stripv0 = map (XML.foldXML (\t as cs -> XML.makeXML t . mconcat $ map mkAttr as ++ map (either (XML.string . replace "v0" "") XML.builder) cs) id id )

   -- Turn string like 000100010 into expected inputs. Third and last boolean are meaningless.
   fixMatrix :: [XML.XML] -> [XML.XML]
   fixMatrix [] = []
   fixMatrix [xml] = 
      let string = XML.getData xml 
      in map (\(i,v) -> XML.makeXML "input" . mconcat $ [ "id" XML..=. ['0','8',i], (if v == '1' then XML.string "true" else XML.string "false")]) $
            if length string == 9 && all (`elem` ['0','1']) string
               then zip ['a'..] (take 2 string ++ take 5 (drop 3 string)) 
               else []

   -- | Change names of inputs according to given translation.
   transformIDs :: [(InputID, InputID)] -> [XML.XML] -> [XML.XML]
   transformIDs ids = map $ \xml -> 
      let f t as cs = XML.makeXML t . mconcat $ map (mkAttr . g) as ++ map (mkContent id) cs
          g ("id" XML.:= i) = "id" XML.:= fromMaybe "no-identifier" (lookup i ids) 
          g other = other
      in XML.foldXML f id id xml

   -- | Separate inputs at the word "question" and give them a corresponding
   -- ID.
   separate :: [XML.XML] -> [XML.XML]
   separate xmls = 
      case xmls of
         [xml] -> 
            map (
               XML.makeXML (XML.name xml) 
               . mconcat 
               . (\(i,cs) -> ("id" XML..=. i) : map (either XML.string XML.builder) cs)
            )
            . reverse
            . sepAtQuestion [("1",[])]
            $ XML.content xml
         _ -> error "There shouldn't be more than one child to be separated"

      where
      sepAtQuestion :: [(InputID, [Either String XML.XML])] -> [Either String XML.XML] -> [(InputID, [Either String XML.XML])]
      sepAtQuestion all@((identifier,content):rest) elements = case elements of
         [] -> all
         Right x:xs -> sepAtQuestion ((identifier,content ++ [Right x]):rest) xs
         Left x:xs -> case strToLower x `splitOn` "question" of
            ("","") -> all
            ("", y') -> sepAtQuestion all (Left y':xs)
            (x', y') -> case dropWhile isSpace y' of
               '3':z' -> sepAtQuestion (("3",[]):(identifier,content ++ [Left x']):rest) (Left z':xs)
               '2':z' -> sepAtQuestion (("2",[]):(identifier,content ++ [Left x']):rest) (Left z':xs)
               '1':z' -> sepAtQuestion ((identifier,content ++ [Left x']):rest) (Left z':xs)
               z' -> sepAtQuestion ((identifier,content ++ [Left x']):rest) (Left z':xs)
               

-- | Process the records through the AdviseMe Domain Reasoner and update the
-- 'output' and 'evidence' columns accordingly.
--
-- TODO: Also update the responsetime column.
domainReasoner :: [SqlRecord] -> IO [SqlRecord]
domainReasoner = mapM $ \record -> do
   reply <- runDomainReasoner $ record ! "input"
   evidence <- 
      if hasError reply
         then do
            putStrLn $ "Warning: Ideas replied with an error: \n" ++ reply
            return mempty
         else do
            xmlReply <- either fail return $ XML.parseXML reply
            findEvidence xmlReply
   return . insert "output" reply
          . insert "evidence" evidence
          $ record

-- | Crude way of establishing whether IDEAS' reply contained an error.
hasError :: String -> Bool
hasError = ("<reply result=\"error\"" `isPrefixOf`)


-- | Replace evidence in records with evidence extracted from spreadsheets
-- (only if XLSX flag is set).
replaceEvidence :: [FilePath] -> [SqlRecord] -> IO [SqlRecord]
#ifdef XLSX
replaceEvidence paths records = do
   evidenceTable <- foldr (M.unionWith (<>)) mempty <$> mapM readEvidence paths
   return . flip map records $ \record ->
      maybe record (flip (insert "evidence") record) 
         (M.lookup (record ! "studentid", record ! "taskid") evidenceTable) 
#else
replaceEvidence = undefined
#endif


-- | Build the student model for every record.
studentModeller :: [SqlRecord] -> IO [SqlRecord]
studentModeller records = flip S.evalStateT mempty $
   forM records $ \r -> do
      let sID = r ! "studentid"    :: String
          tID = r ! "taskid"       :: String
          ev  = r ! "evidence"     :: Evidence
      otherPartialModels <- S.gets (M.findWithDefault mempty sID)
      let partialModel = taskConcepts tID ev
      let allPartialModels' = M.insert tID partialModel otherPartialModels
      let studentModel = aggregateConcepts (M.toList allPartialModels')
      S.modify $ M.insert sID allPartialModels'
      return . insert "studentmodel" studentModel 
             . insert "partialmodel" partialModel
             $ r


-- | Record all final models in a seperate table.
addModelsTable :: Connection -> [SqlRecord] -> IO ()
addModelsTable conn records = do
   dropTable conn modelsTable
   createTable conn modelsTable
   insertRecords conn modelsTable (map asModel finalRecords')

   where

   -- Only keep the final record of each student
   finalRecords'
      = filter (\r -> (/= "") . SQL.fromSql $ r ! "studentmodel")
      . reverse
      . nubBy ((==) `on` ((! "studentid") :: SqlRecord -> String))
      . reverse
      $ records

   nodeNames = map fst . evStates . SQL.fromSql $ head finalRecords' ! "studentmodel"

   -- Turn a student record into a model record
   asModel rec = M.fromList $
      ("studentid", rec ! "studentid") :
      (map (fmap SQL.toSql) . evStates . SQL.fromSql $ rec ! "studentmodel")

   modelsTable = SqlTable
      { tableName = "final_models"
      , columns = ("studentid", "TEXT") : map (\x -> (x, "REAL")) nodeNames
      }

#ifdef XLSX
-- | Record human assessments in a separate table.
writeHumanAssessmentTable :: Connection -> Tally -> IO ()
writeHumanAssessmentTable conn tally = do
   dropTable conn assessmentTable
   createTable conn assessmentTable
   insertRecords conn assessmentTable (map asMatch $ expectedPresence tally ++ expectedAbsence tally ++ mismatchState tally)

   where
   -- Turn a student record into a model record
   asMatch m = M.fromList $ 
      [("studentid", SQL.toSql $ locStudent m)
      , ("taskid", SQL.toSql $ locTask m)
      , ("nodeid", SQL.toSql $ locNode m)
      , ("expected", SQL.toSql $ expectation m)
      , ("observed", SQL.toSql $ observation m)] 
   
   assessmentTable = SqlTable
      { tableName = "assessment"
      , columns = [("studentid", "TEXT"), ("taskid", "TEXT"), ("nodeid", "TEXT"), ("expected","TEXT"), ("observed","TEXT")]
      }
#endif




-------------------------------------------------------------------------------

-- | Make simulated students out of a CSV file with columns containing evidence
-- for every (!) task.
simulateStudents :: FilePath -> IO [SqlRecord]
simulateStudents file = do
   putStrLn "Generating entries for simulated evidence..."
   (keys:values) <- readFileCSV file
   let sIDs = map (("simulation" ++) . show) [1 :: Int ..]
   let students = zip sIDs $ map (M.fromList . zip keys) values
   studentModeller
      [ insert "studentid" sID
      . insert "taskid" tID
      . insert "evidence" (row M.! tID)
      . insert "input" "generated student"
      $ defaultEntry [studentsTable, requestsTable]
      | tID <- filter (/= "rectanglearea_old") taskIDs
      , (sID, row) <- students ]