module Numeric.Datasets.BreastCancerWisconsin where
import Numeric.Datasets
import Data.Csv
import GHC.Generics
import Control.Applicative
data Diagnosis = Malignant | Benign deriving (Show, Read, Eq, Generic)
data Prognosis = Recurrent | Nonrecurrent deriving (Show, Read, Eq, Generic)
intToDiagnosis :: Int -> Diagnosis
intToDiagnosis 2 = Benign
intToDiagnosis 4 = Malignant
intToDiagnosis _ = error "unknown diagnosis code"
data BreastCancerEntry = BreastCancerEntry
{ sampleCodeNumber :: Int
, clumpThickness :: Int
, uniformityCellSize :: Int
, uniformityCellShape :: Int
, marginalAdhesion :: Int
, singleEpithelialCellSize :: Int
, bareNuclei :: Maybe Int
, blandChromatin :: Int
, normalNucleoli :: Int
, mitosis :: Int
, sampleClass :: Diagnosis
} deriving (Show, Read, Generic)
instance FromRecord BreastCancerEntry where
parseRecord v = BreastCancerEntry <$> v .! 0 <*> v .! 1 <*> v .! 2 <*> v .! 3 <*> v .! 4 <*> v .! 5 <*> (v .! 6 <|> return Nothing) <*> v .! 7 <*> v .! 8 <*> v .! 9 <*> (intToDiagnosis <$> v .! 10)
breastCancerDatabase :: Dataset BreastCancerEntry
breastCancerDatabase = csvDataset
$ URL "http://mlr.cs.umass.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data"
data DiagnosticBreastCancer = DiagnosticBreastCancer
{ diagnosticID :: Int
, diagnosis :: Diagnosis
, diagnosticCells :: CellFeatures
} deriving (Show, Read, Generic)
data PrognosticBreastCancer = PrognosticBreastCancer
{ prognosticID :: Int
, prognosis :: Prognosis
, prognosticCells :: CellFeatures
} deriving (Show, Read, Generic)
data CellFeatures = CellFeatures
{ radius :: Double
, perimeter :: Double
, area :: Double
, smoothness :: Double
, compactness :: Double
, concavity :: Double
, concavePoints :: Double
, symmetry :: Double
, fractalDimension :: Double
} deriving (Show, Read, Generic)
charToDiagnosis :: String -> Diagnosis
charToDiagnosis "M" = Malignant
charToDiagnosis "B" = Benign
charToDiagnosis _ = error "unknown diagnosis"
charToPrognosis :: String -> Prognosis
charToPrognosis "N" = Nonrecurrent
charToPrognosis "R" = Recurrent
charToPrognosis _ = error "unknown diagnosis"
instance FromRecord DiagnosticBreastCancer where
parseRecord v = DiagnosticBreastCancer <$> v .! 0 <*> (charToDiagnosis <$> v .! 1) <*> parseRecord v
instance FromRecord PrognosticBreastCancer where
parseRecord v = PrognosticBreastCancer <$> v .! 0 <*> (charToPrognosis <$> v .! 1) <*> parseRecord v
instance FromRecord CellFeatures where
parseRecord v = CellFeatures <$> v .! 2 <*> v .! 3 <*> v .! 4 <*> v .! 5 <*> v .! 6 <*> v .! 7 <*> v .! 8 <*> v .! 9 <*> v .! 10
diagnosticBreastCancer :: Dataset DiagnosticBreastCancer
diagnosticBreastCancer = csvDataset
$ URL "http://mlr.cs.umass.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
prognosticBreastCancer :: Dataset PrognosticBreastCancer
prognosticBreastCancer = csvDataset
$ URL "http://mlr.cs.umass.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wpbc.data"