module Numeric.Datasets.Adult where
import Numeric.Datasets
import Data.Csv
import GHC.Generics
import Control.Applicative
import Data.Text (Text, strip)
data WorkClass = Private | SelfEmpNotInc | SelfEmpInc | FederalGov
| LocalGov | StateGov | WithoutPay | NeverWorked
deriving (Show, Read, Eq, Generic)
instance FromField WorkClass where
parseField = parseDashToCamelField
data MaritalStatus = MarriedCivSpouse | Divorced | NeverMarried
| Separated | Widowed | MarriedSpouseAbsent | MarriedAFSpouse
deriving (Show, Read, Eq, Generic)
instance FromField MaritalStatus where
parseField s = parseDashToCamelField s
data Occupation = TechSupport | CraftRepair | OtherService | Sales | ExecManagerial | ProfSpecialty
| HandlersCleaners | MachineOpInspct | AdmClerical | FarmingFishing | TransportMoving
| PrivHouseServ | ProtectiveServ | ArmedForces
deriving (Show, Read, Eq, Generic)
instance FromField Occupation where
parseField s = parseDashToCamelField s
data Relationship = Wife | OwnChild | Husband | NotInFamily | OtherRelative | Unmarried
deriving (Show, Read, Eq, Generic)
instance FromField Relationship where
parseField s = parseDashToCamelField s
data Race = White | AsianPacIslander | AmerIndianEskimo | Other | Black
deriving (Show, Read, Eq, Generic)
instance FromField Race where
parseField s = parseDashToCamelField s
data Sex = Female | Male
deriving (Show, Read, Eq, Generic)
instance FromField Sex where
parseField s = parseDashToCamelField s
data Income = GT50K | LE50K
deriving (Show, Read, Eq, Generic)
instance FromField Income where
parseField " >50K" = pure GT50K
parseField " <=50K" = pure LE50K
parseField " >50K." = pure GT50K
parseField " <=50K." = pure LE50K
parseField ">50K" = pure GT50K
parseField "<=50K" = pure LE50K
parseField _ = fail "unknown income"
data Adult = Adult
{ age :: Int
, workClass :: Maybe WorkClass
, finalWeight :: Int
, education :: Text
, educationNum :: Int
, maritalStatus :: MaritalStatus
, occupation :: Maybe Occupation
, relationship :: Relationship
, race :: Race
, sex :: Sex
, capitalGain :: Int
, capitalLoss :: Int
, hoursPerWeek :: Int
, nativeCountry :: Text
, income :: Income
} deriving (Show, Read, Generic)
instance FromRecord Adult where
parseRecord v = Adult <$> v .! 0 <*> (v.! 1 <|> return Nothing) <*> v.!2 <*> (strip <$> v.!3)
<*> v.!4 <*> v.!5<*> (v.!6 <|> return Nothing) <*> v.!7 <*> v.!8
<*> v.!9 <*> v.!10 <*> v.!11 <*> v.!12<*> v.!13<*> v.!14
adult :: Dataset Adult
adult = csvDataset $ URL "http://mlr.cs.umass.edu/ml/machine-learning-databases/adult/adult.data"
adultTestSet :: Dataset Adult
adultTestSet = csvDatasetPreprocess (dropLines 1) $ URL "http://mlr.cs.umass.edu/ml/machine-learning-databases/adult/adult.test"