{-| Code to read input data files containing the test inputs and test outputs used to evaluate the fitness of individuals. Author: mjsottile\@computer.org NOTE: Parsec code for CSV files -} module GEP.Examples.Regression.FitnessInput ( readFitnessInput ) where import Text.ParserCombinators.Parsec import System.Exit -- -- assume files have CSV format with a header row where each entry in the -- header row names a variable. note that currently we require these to -- be single characters. eventually we may automate the process of mapping -- variables onto characters in the genome to allow more expressive names -- to be associated with variables. -- -- PARSEC STUFF csvfile = many csvline csvline = do entries <- (sepBy entry (char ',')) newline return entries -- entry accepts any string containing alphanum or periods, with spaces either -- before or after the value. entry = do many (char ' ') body <- many (noneOf ",\n") many (char ' ') return body -- END PARSEC STUFF type FitnessDict = [[(Char,Double)]] dictify :: [String] -> [[String]] -> (FitnessDict, [Double]) dictify lbls values = (map (\j -> zip (init charLbls) j) (init floatValues), map last floatValues) where charLbls = map head lbls floatValues = map (\j -> map (\i -> (read i) :: Double) j) values -- function that takes a filename and returns a dictionary readFitnessInput :: String -> IO (FitnessDict,[Double]) readFitnessInput fname = do result <- parseFromFile csvfile fname case result of Left err -> do putStrLn "Bad regression fitness input!" exitFailure Right xs -> do return $ dictify (head xs) (tail xs) {- main :: IO () main = do x <- readFitnessInput "test.csv" print x -}