{-# language DeriveGeneric, OverloadedStrings, LambdaCase, DataKinds #-}
module Numeric.Datasets.Titanic (titanicRemote, titanicLocal, TitanicEntry(..), Class(..), Age(..), Sex(..))where
import Numeric.Datasets
import Data.Csv
import GHC.Generics
import Network.HTTP.Req ((/:), https, Scheme(..))
data TitanicEntry = TitanicEntry {
tClass :: Class
, tAge :: Age
, tSex :: Sex
, tSurvived :: Bool
} deriving (Eq, Read, Show, Generic)
instance FromNamedRecord TitanicEntry where
parseNamedRecord v = TitanicEntry <$>
(parseClass <$> v .: "Pclass") <*>
(v .: "Age") <*>
(parseSex <$> v .: "Sex") <*>
(parseBool <$> v .: "Survived")
data Class = First | Second | Third | Crew deriving (Eq, Read, Show, Generic, Enum, Bounded)
parseClass :: String -> Class
parseClass = \case
"1" -> First
"2" -> Second
"3" -> Third
"Crew" -> Crew
x -> error $ unwords ["Unexpected feature value :", show x]
newtype Age = Age (Maybe Double) deriving (Eq, Read, Show, Generic)
instance FromField Age where
parseField s = case s of
"NA" -> pure $ Age Nothing
ss -> case runParser (parseField ss :: Parser Double) of
Left _ -> pure $ Age Nothing
Right x -> pure $ Age $ Just x
data Sex = Female | Male deriving (Eq, Read, Show, Generic, Enum, Bounded)
parseSex :: String -> Sex
parseSex = \case
"female" -> Female
"male" -> Male
x -> error $ unwords ["Unexpected feature value :", show x]
parseBool :: String -> Bool
parseBool = \case
"1" -> True
"0" -> False
x -> error $ unwords ["Unexpected feature value :", show x]
titanicRemote :: Dataset TitanicEntry
titanicRemote = withPreprocess removeEscQuotes $ csvHdrDatasetSep '\t' $ URL $ https "raw.githubusercontent.com" /: "JackStat" /: "6003Data" /: "master" /: "Titanic.txt"
titanicLocal :: Dataset TitanicEntry
titanicLocal = withPreprocess removeEscQuotes $ csvHdrDatasetSep '\t' $ File "datafiles/titanic2_full.tsv"