module Database.HumanAssessment
( Tally(..)
, Match(..)
, prettyMatches
, summarise
, summariseTasks
, score
, tally
, tally'
, getSpreadsheet
, getEvidence
, getHumanMachineFiles
, readEvidence
) where
import Data.List ( sortBy )
import Data.Function ( on )
import Data.Maybe ( fromJust, isJust, fromMaybe )
import Data.Char ( isDigit )
import Data.Semigroup ( Semigroup, (<>) )
import Data.Text ( unpack )
import Control.Monad ( forM, when )
import Control.Lens ( (^.) )
import Text.Printf ( printf )
import System.FilePath ( (</>), (<.>), takeBaseName, takeExtension )
import System.Directory ( getDirectoryContents )
import Text.PrettyPrint.Leijen ( pretty, (<+>), (<$$>) )
import qualified Data.ByteString.Lazy as BS
import qualified Data.Map as M
import qualified Codec.Xlsx as XLSX
import qualified Database.HDBC.Sqlite3 as SQL ( connectSqlite3 )
import qualified Text.PrettyPrint.Leijen as PP
import Util.List ( index )
import Util.String ( split, trim, normalize, percentage, (//) )
import Database.Data ( PilotID, StudentID, TaskID, NodeID, allEvidence )
import Bayes.Evidence ( Evidence, hardEvidence, setId )
import Main.Tasks ( findTaskFuzzy, taskNetwork )
import Bayes.Network ( findNodeFuzzy, states, Node, nodeId, state2label, label2state )
import Recognize.Data.MathStoryProblem ( Task(Task), singleNetwork )
getHumanMachineFiles :: FilePath -> FilePath -> IO [(PilotID, FilePath, FilePath)]
getHumanMachineFiles machineDir humanDir = do
files <- filter ((== ".xlsx") . takeExtension) <$> getDirectoryContents humanDir
forM files $ \file -> do
let pilot = takeBaseName file
let spreadsheet = humanDir </> pilot <.> "xlsx"
let database = machineDir </> pilot <.> "db"
return (pilot, spreadsheet, database)
data Match = Match
{ locStudent :: StudentID
, locTask :: TaskID
, locNode :: NodeID
, expectation :: Maybe String
, observation :: Maybe String
}
data Tally = Tally
{ matchState :: [Match]
, matchAbsence :: [Match]
, mismatchState :: [Match]
, expectedPresence :: [Match]
, expectedAbsence:: [Match]
}
instance Monoid Tally where
mempty = Tally [] [] [] [] []
mappend = (<>)
instance Semigroup Tally where
x <> y = Tally
{ matchState = matchState x ++ matchState y
, matchAbsence = matchAbsence x ++ matchAbsence y
, mismatchState = mismatchState x ++ mismatchState y
, expectedPresence = expectedPresence x ++ expectedPresence y
, expectedAbsence = expectedAbsence x ++ expectedAbsence y
}
instance PP.Pretty Match where
pretty m =
PP.fill 35 (pretty $ locNode m) <+>
PP.fill 10 (pretty $ locStudent m) <+>
PP.fill 16 (maybe (pretty "empty") (PP.dquotes . pretty) (expectation m)) <+>
maybe (pretty "empty") (PP.dquotes . pretty) (observation m)
prettyMatches :: [Match] -> PP.Doc
prettyMatches matches =
PP.fill 35 (pretty "NODE") <+>
PP.fill 10 (pretty "STUDENT") <+>
PP.fill 16 (pretty "HUMAN/EXPECTED") <+>
pretty "MACHINE/OBSERVED" <$$>
(PP.vsep . map pretty $ matches)
instance PP.Pretty Tally where
pretty t =
pretty "Match" <+> perc totalMatch totalTally <$$> (PP.indent 4 $
pretty "States match" <+> perc (length . matchState) totalMatch <$$>
pretty "Agree on absence" <+> perc (length . matchAbsence) totalMatch
) <$$>
pretty "Mismatch" <+> perc totalMismatch totalTally <$$> (PP.indent 4 $
pretty "States mismatch" <+> perc (length . mismatchState) totalMismatch <$$>
pretty "Expected presence" <+> perc (length . expectedPresence) totalMismatch <$$>
pretty "Expected absence" <+> perc (length . expectedAbsence) totalMismatch
) <$$>
pretty "Severe mismatches:" <+> PP.text (percentage 1 (score (\t' -> length $ mismatchState t' ++ expectedAbsence t') totalTally t))
where
perc f g =
pretty (f t) <+> pretty "of" <+> pretty (g t) <+>
PP.parens (PP.text $ percentage 1 (score f g t))
totalMatch :: Tally -> Int
totalMatch t = length (matchState t) + length (matchAbsence t)
totalMismatch :: Tally -> Int
totalMismatch t = length (mismatchState t) + length (expectedAbsence t) + length (expectedPresence t)
totalTally :: Tally -> Int
totalTally t = totalMatch t + totalMismatch t
score :: (Tally -> Int) -> (Tally -> Int) -> Tally -> Double
score field1 field2 t = field1 t // field2 t
type State = Maybe String
getState :: M.Map (StudentID, TaskID) Evidence
-> StudentID -> TaskID -> NodeID -> State
getState db sID tID nID =
M.lookup (sID, tID) db >>= lookup nID . hardEvidence >>= return . state2label taskNetwork nID
(!) :: XLSX.CellMap -> (Int,Int) -> Maybe String
cells ! rowcol = trim . toString <$> (M.lookup rowcol cells >>= (^. XLSX.cellValue))
where
toString :: XLSX.CellValue -> String
toString (XLSX.CellText t) = unpack t
toString (XLSX.CellDouble d) = printf "%.f" d
toString t = error $ "not a text cell: " ++ show t
canonicalNode :: TaskID -> NodeID -> Node ()
canonicalNode tID nID = either error id $ do
Task t <- findTaskFuzzy tID
findNodeFuzzy (singleNetwork t) nID
getWorksheets :: XLSX.Xlsx -> [(TaskID, XLSX.CellMap)]
getWorksheets spreadsheet =
[ (tID, cells)
| (sheetname, worksheet) <- spreadsheet ^. XLSX.xlSheets
, let cells = worksheet ^. XLSX.wsCells
, let tID = normalize . unpack $ sheetname
]
getNodes :: XLSX.CellMap -> [(NodeID, Int)]
getNodes cells =
[ (nodeID_noncanonical, row)
| (row, col) <- M.keys cells
, row >= 3
, col == 1
, let value = cells ! (row,col)
, isJust value
, let nodeID_noncanonical = fromJust value
]
getStudents :: XLSX.CellMap -> [(StudentID, Int)]
getStudents cells =
[ (sID, col)
| (row, col) <- M.keys cells
, row == 1
, col >= 3
, let value = cells ! (row,col)
, isJust value
, let sID = fromJust value
]
getStates :: TaskID -> XLSX.CellMap -> [((StudentID, NodeID), State)]
getStates tID cells =
[ ((sID, nodeId node), cells ! (row, col))
| (sID, col) <- getStudents cells
, (nodeID_noncanonical, row) <- getNodes cells
, let node = canonicalNode tID nodeID_noncanonical
]
getEvidence :: XLSX.Xlsx -> M.Map (StudentID, TaskID) Evidence
getEvidence spreadsheet = M.fromListWith (<>) $
[ ((sID, tID), maybe mempty (\state -> setId nodeID (label2state taskNetwork nodeID state) mempty) expected )
| (tID, cells) <- getWorksheets spreadsheet
, ((sID, nodeID), expected) <- getStates tID cells
]
readEvidence :: FilePath -> IO (M.Map (StudentID, TaskID) Evidence)
readEvidence path = getEvidence . XLSX.toXlsx <$> BS.readFile path
getSpreadsheet :: FilePath -> IO XLSX.Xlsx
getSpreadsheet path = XLSX.toXlsx <$> BS.readFile path
tally :: FilePath -> FilePath -> IO (M.Map (TaskID, NodeID) Tally)
tally spreadsheetFile databaseFile = do
spreadsheet <- getSpreadsheet spreadsheetFile
allObserved <- SQL.connectSqlite3 databaseFile >>= allEvidence
tally' spreadsheet allObserved
tally' :: XLSX.Xlsx -> M.Map (StudentID,TaskID) Evidence -> IO (M.Map (TaskID, NodeID) Tally)
tally' spreadsheet allObserved = do
return . M.fromListWith (<>) $
[ (,) (tID, nodeID) $ categorise match
| (tID, cells) <- getWorksheets spreadsheet
, ((sID, nodeID), expected) <- getStates tID cells
, let observed = getState allObserved sID tID nodeID
, let match = Match { locStudent = sID, locTask = tID, locNode = nodeID, observation = observed, expectation = expected }
]
where
categorise :: Match -> Tally
categorise match = case expectation match of
Just e -> case observation match of
Just o -> if e == o then mempty { matchState = [match] } else mempty { mismatchState = [match] }
Nothing -> mempty { expectedPresence = [match] }
Nothing -> case observation match of
Just _ -> mempty { expectedAbsence = [match] }
Nothing -> mempty { matchAbsence = [match] }
summariseTasks :: M.Map (TaskID, NodeID) Tally -> M.Map TaskID Tally
summariseTasks = M.mapKeysWith (<>) fst
summarise :: Foldable t => t Tally -> Tally
summarise = foldl (<>) mempty