{-# LANGUAGE DeriveGeneric, OverloadedStrings, LambdaCase, DataKinds #-} {- | Mushroom data set UCI ML Repository link This data set includes descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms in the Agaricus and Lepiota Family. Each species is identified as definitely edible, definitely poisonous, or of unknown edibility and not recommended. This latter class was combined with the poisonous one. Attribute Information: All the attributes are discrete (categorical), and attribute 11 ("stalk-root") lacks some entries. The 'classification' attribute has been mapped to a Boolean value (where edible = True). 0. classification: poisonous=p, edible=e 1. cap-shape: bell=b,conical=c,convex=x,flat=f, knobbed=k,sunken=s 2. cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s 3. cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r, pink=p,purple=u,red=e,white=w,yellow=y 4. bruises?: bruises=t,no=f 5. odor: almond=a,anise=l,creosote=c,fishy=y,foul=f, musty=m,none=n,pungent=p,spicy=s 6. gill-attachment: attached=a,descending=d,free=f,notched=n 7. gill-spacing: close=c,crowded=w,distant=d 8. gill-size: broad=b,narrow=n 9. gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g, green=r,orange=o,pink=p,purple=u,red=e, white=w,yellow=y 10. stalk-shape: enlarging=e,tapering=t 11. stalk-root: bulbous=b,club=c,cup=u,equal=e, rhizomorphs=z,rooted=r,missing=? 12. stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s 13. stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s 14. stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o, pink=p,red=e,white=w,yellow=y 15. stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o, pink=p,red=e,white=w,yellow=y 16. veil-type: partial=p,universal=u 17. veil-color: brown=n,orange=o,white=w,yellow=y 18. ring-number: none=n,one=o,two=t 19. ring-type: cobwebby=c,evanescent=e,flaring=f,large=l, none=n,pendant=p,sheathing=s,zone=z 20. spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r, orange=o,purple=u,white=w,yellow=y 21. population: abundant=a,clustered=c,numerous=n, scattered=s,several=v,solitary=y 22. habitat: grasses=g,leaves=l,meadows=m,paths=p, urban=u,waste=w,woods=d Example rows: p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g -} module Numeric.Datasets.Mushroom ( mushroom , MushroomEntry(..) , CapShape(..), CapSurface(..), CapColor(..), Odor(..) , GillAttachment(..), GillSpacing(..), GillSize(..), GillColor(..), StalkShape(..) , StalkRoot(..), StalkSurfaceAboveRing(..), StalkSurfaceBelowRing(..) , StalkColorAboveRing(..), StalkColorBelowRing(..), VeilType(..), VeilColor(..) , RingNumber(..), RingType(..), SporePrintColor(..), Population(..), Habitat(..) ) where import Numeric.Datasets import Data.Csv import GHC.Generics -- import Control.Applicative import Network.HTTP.Req ((/:), Scheme(..)) data MushroomEntry = MushroomEntry { edible :: Bool -- ^ Is the mushroom edible? , capShape :: CapShape , capSurface :: CapSurface , capColor :: CapColor , bruises :: Bool , odor :: Odor , gillAttachment :: GillAttachment , gillSpacing :: GillSpacing , gillSize :: GillSize , gillColor :: GillColor , stalkShape :: StalkShape , stalkRoot :: Maybe StalkRoot , stalkSurfaceAboveRing :: StalkSurfaceAboveRing , stalkSurfaceBelowRing :: StalkSurfaceBelowRing , stalkColorAboveRing :: StalkColorAboveRing , stalkColorBelowRing :: StalkColorBelowRing , veilType :: VeilType , veilColor :: VeilColor , ringNumber :: RingNumber , ringType :: RingType , sporePrintColor :: SporePrintColor , population :: Population , habitat :: Habitat } deriving (Show, Read, Generic) instance FromRecord MushroomEntry where parseRecord v = MushroomEntry <$> (charToClassification <$> v .! 0) <*> (charToCapShape <$> v .! 1) <*> (charToCapSurface <$> v .! 2) <*> (charToCapColor <$> v .! 3) <*> (charToBruises <$> v .! 4) <*> (charToOdor <$> v .! 5) <*> (charToGillAttachment <$> v .! 6) <*> (charToGillSpacing <$> v .! 7) <*> (charToGillSize <$> v .! 8) <*> (charToGillColor <$> v .! 9) <*> (charToStalkShape <$> v .! 10) <*> (charToStalkRoot <$> v .! 11) <*> (charToSsar <$> v .! 12) <*> (charToSsbr <$> v .! 13) <*> (charToScar <$> v .! 14) <*> (charToScbr <$> v .! 15) <*> (charToVeilType <$> v .! 16) <*> (charToVeilColor <$> v .! 17) <*> (charToRingNumber <$> v .! 18) <*> (charToRingType <$> v .! 19) <*> (charToSporePrintColor <$> v .! 20) <*> (charToPopulation <$> v .! 21) <*> (charToHabitat <$> v .! 22) -- | Whether a mushroom sample is poisonous or edible; the classification tasks involves predicting this label. -- data Classification = Poisonous | Edible deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToClassification :: Char -> Bool charToClassification = \case 'p' -> False 'e' -> True x -> error $ unwords ["Unexpected feature value :", show x] data CapShape = Bell | Conical | Convex | Flat | Knobbed | Sunken deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToCapShape :: Char -> CapShape charToCapShape = \case 'b' -> Bell 'c' -> Conical 'x' -> Convex 'f' -> Flat 'k' -> Knobbed 's' -> Sunken x -> error $ unwords ["Unexpected feature value :", show x] data CapSurface = CSFibrous | CSGrooves | CSScaly | CSSmooth deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToCapSurface :: Char -> CapSurface charToCapSurface = \case 'f' -> CSFibrous 'g' -> CSGrooves 'y' -> CSScaly 's' -> CSSmooth x -> error $ unwords ["Unexpected feature value :", show x] data CapColor = CCBrown | CCBuff | CCCinnamon | CCGray | CCGreen | CCPink | CCPurple | CCRed | CCWhite | CCYellow deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToCapColor :: Char -> CapColor charToCapColor = \case 'n' -> CCBrown 'b' -> CCBuff 'c' -> CCCinnamon 'g' -> CCGray 'r' -> CCGreen 'p' -> CCPink 'u' -> CCPurple 'e' -> CCRed 'w' -> CCWhite 'y' -> CCYellow x -> error $ unwords ["Unexpected feature value :", show x] charToBruises :: Char -> Bool charToBruises c = case c of 't' -> True 'f' -> False x -> error $ unwords ["Unexpected feature value :", show x] data Odor = Almond | Anise | Creosote | Fishy | Foul | Musty | None | Pungent | Spicy deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToOdor :: Char -> Odor charToOdor = \case 'a' -> Almond 'l' -> Anise 'c' -> Creosote 'y' -> Fishy 'f' -> Foul 'm' -> Musty 'n' -> None 'p' -> Pungent 's' -> Spicy x -> error $ unwords ["Unexpected feature value :", show x] data GillAttachment = Attached | Descending | Free | Notched deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToGillAttachment :: Char -> GillAttachment charToGillAttachment = \case 'a' -> Attached 'd' -> Descending 'f' -> Free 'n' -> Notched x -> error $ unwords ["Unexpected feature value :", show x] data GillSpacing = Close | Crowded | Distant deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToGillSpacing :: Char -> GillSpacing charToGillSpacing = \case 'c' -> Close 'w' -> Crowded 'd' -> Distant x -> error $ unwords ["Unexpected feature value :", show x] data GillSize = Broad | Narrow deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToGillSize :: Char -> GillSize charToGillSize = \case 'b' -> Broad 'n' -> Narrow x -> error $ unwords ["Unexpected feature value :", show x] data GillColor = GCBlack | GCBrown | GCBuff | GCChocolate | GCGray | GCGreen | GCOrange | GCPink | GCPurple | GCRed | GCWhite | GCYellow deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToGillColor :: Char -> GillColor charToGillColor = \case 'k' -> GCBlack 'n' -> GCBrown 'b' -> GCBuff 'h' -> GCChocolate 'g' -> GCGray 'r' -> GCGreen 'o' -> GCOrange 'p' -> GCPink 'u' -> GCPurple 'e' -> GCRed 'w' -> GCYellow x -> error $ unwords ["Unexpected feature value :", show x] data StalkShape = Enlarging | Tapering deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToStalkShape :: Char -> StalkShape charToStalkShape = \case 'e' -> Enlarging 't' -> Tapering x -> error $ unwords ["Unexpected feature value :", show x] data StalkRoot = Bulbous | Club | Cup | Equal | Rhizomorphs | Rooted deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToStalkRoot :: Char -> Maybe StalkRoot charToStalkRoot c = case c of 'b' -> Just Bulbous 'c' -> Just Club 'u' -> Just Cup 'e' -> Just Equal 'z' -> Just Rhizomorphs 'r' -> Just Rooted '?' -> Nothing x -> error $ unwords ["Unexpected feature value :", show x] data StalkSurfaceAboveRing = SSARFibrous | SSARScaly | SSARSilky | SSARSmooth deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToSsar :: Char -> StalkSurfaceAboveRing charToSsar = \case 'f' -> SSARFibrous 'y' -> SSARScaly 'k' -> SSARSilky 's' -> SSARSmooth x -> error $ unwords ["Unexpected feature value :", show x] data StalkSurfaceBelowRing = SSBRFibrous | SSBRScaly | SSBRSilky | SSBRSmooth deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToSsbr :: Char -> StalkSurfaceBelowRing charToSsbr = \case 'f' -> SSBRFibrous 'y' -> SSBRScaly 'k' -> SSBRSilky 's' -> SSBRSmooth x -> error $ unwords ["Unexpected feature value :", show x] data StalkColorAboveRing = SCARBrown | SCARBuff | SCARCinnamon | SCARGray | SCAROrange | SCARPink | SCARRed | SCARWhite | SCARYellow deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToScar :: Char -> StalkColorAboveRing charToScar = \case 'n' -> SCARBrown 'b' -> SCARBuff 'c' -> SCARCinnamon 'g' -> SCARGray 'o' -> SCAROrange 'p' -> SCARPink 'e' -> SCARRed 'w' -> SCARWhite 'y' -> SCARYellow x -> error $ unwords ["Unexpected feature value :", show x] data StalkColorBelowRing = SCBRBrown | SCBRBuff | SCBRCinnamon | SCBRGray | SCBROrange | SCBRPink | SCBRRed | SCBRWhite | SCBRYellow deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToScbr :: Char -> StalkColorBelowRing charToScbr = \case 'n' -> SCBRBrown 'b' -> SCBRBuff 'c' -> SCBRCinnamon 'g' -> SCBRGray 'o' -> SCBROrange 'p' -> SCBRPink 'e' -> SCBRRed 'w' -> SCBRWhite 'y' -> SCBRYellow x -> error $ unwords ["Unexpected feature value :", show x] data VeilType = Partial | Universal deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToVeilType :: Char -> VeilType charToVeilType = \case 'p' -> Partial 'u' -> Universal x -> error $ unwords ["Unexpected feature value :", show x] data VeilColor = VCBrown | VCOrange | VCWhite | VCYellow deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToVeilColor :: Char -> VeilColor charToVeilColor = \case 'n' -> VCBrown 'o' -> VCOrange 'w' -> VCWhite 'y' -> VCYellow x -> error $ unwords ["Unexpected feature value :", show x] -- | 18. ring-number: none=n,one=o,two=t data RingNumber = RNNone | RNOne | RNTwo deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToRingNumber :: Char -> RingNumber charToRingNumber = \case 'n' -> RNNone 'o' -> RNOne 't' -> RNTwo x -> error $ unwords ["Unexpected feature value :", show x] data RingType = RTCobwebby | RTEvanescent | RTFlaring | RTLarge | RTNone | RTPendant | RTSheathing | RTZone deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToRingType :: Char -> RingType charToRingType = \case 'c' -> RTCobwebby 'e' -> RTEvanescent 'f' -> RTFlaring 'l' -> RTLarge 'n' -> RTNone 'p' -> RTPendant 's' -> RTSheathing 'z' -> RTZone x -> error $ unwords ["Unexpected feature value :", show x] data SporePrintColor = SPCBlack | SPCBrown | SPCBuff | SPCChocolate | SPCGreen | SPCOrange | SPCPurple | SPCWhite | SPCYellow deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToSporePrintColor :: Char -> SporePrintColor charToSporePrintColor = \case 'k' -> SPCBlack 'n' -> SPCBrown 'b' -> SPCBuff 'h' -> SPCChocolate 'r' -> SPCGreen 'o' -> SPCOrange 'u' -> SPCPurple 'w' -> SPCWhite 'y' -> SPCYellow x -> error $ unwords ["Unexpected feature value :", show x] data Population = Abundant | Clustered | Numerous | Scattered | Several | Solitary deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToPopulation :: Char -> Population charToPopulation = \case 'a' -> Abundant 'c' -> Clustered 'n' -> Numerous 's' -> Scattered 'v' -> Several 'y' -> Solitary x -> error $ unwords ["Unexpected feature value :", show x] data Habitat = Grasses | Leaves | Meadows | Paths | Urban | Waste | Woods deriving (Eq, Read, Show, Ord, Enum, Bounded, Generic) charToHabitat :: Char -> Habitat charToHabitat = \case 'g' -> Grasses 'l' -> Leaves 'm' -> Meadows 'p' -> Paths 'u' -> Urban 'w' -> Waste 'd' -> Woods x -> error $ unwords ["Unexpected feature value :", show x] mushroom :: Dataset MushroomEntry mushroom = csvDataset $ URL $ uciMLDB /: "mushroom" /: "agaricus-lepiota.data"