{-# LANGUAGE CPP, DataKinds, OverloadedStrings, QuasiQuotes, ScopedTypeVariables, TemplateHaskell, TypeApplications, TypeOperators #-} module Main (manualGeneration, main) where import Control.Exception (ErrorCall, catch) import Control.Monad (unless) import Data.Functor.Identity import Data.Char import qualified Data.Foldable as F import Data.List (find, isPrefixOf) import Data.Monoid (First(..)) import qualified Data.Text as T import qualified Language.Haskell.TH as TH import Language.Haskell.TH.Syntax (addDependentFile) import Frames import Frames.CSV (produceCSV) import Frames.CSV (defaultParser, produceTokens, defaultSep, readColHeaders) import qualified Chunks import DataCSV import Pipes.Prelude (toListM) import PrettyTH import System.IO (hClose) import System.IO.Temp (withSystemTempFile) import Test.Hspec as H import Test.HUnit.Lang (assertFailure) import qualified LatinTest as Latin import qualified Issue114 import qualified Issue145 import qualified NoHeader import qualified Categorical import qualified UncurryFold import qualified UncurryFoldNoHeader import qualified UncurryFoldPartialData import Data.Vinyl (xrec) -- | Extract all example @(CSV, generatedCode)@ pairs from -- @test/examples.toml@ csvTests :: [(CsvExample, String)] csvTests = $(do addDependentFile "test/examples.toml" csvExamples <- TH.runIO (examplesFrom "test/examples.toml") TH.ListE <$> mapM (\x@(CsvExample _ c _) -> [e|(x,$(generateCode "Row" c))|]) csvExamples) -- | Detect type-compatible re-used names and do not attempt to -- re-generate definitions for them. This does not do the right thing -- since the generated declarations are never turned into actual -- declarations, they do not affect the `lookupTypeName` call that is -- designed to prevent duplicate declarations. -- overlappingGeneration :: String -- overlappingGeneration = m ++ "\n\n" ++ e -- where m = $(do csvExamples <- TH.runIO (examplesFrom "test/examples.toml") -- let Just (CsvExample _ managers _) = -- find (\(CsvExample k _ _) -> k == "managers") csvExamples -- generateCode "ManagerRec" managers) -- e = $(do csvExamples <- TH.runIO (examplesFrom "test/examples.toml") -- let Just (CsvExample _ employees _) = -- find (\(CsvExample k _ _) -> k == "employees") csvExamples -- generateCode "EmployeeRec" employees) -- | To generate example generated code from raw CSV data, add the csv -- to @examples.toml@ and set the @generated@ key to an empty -- string. Then load this file into a REPL that has @:set -- -XTemplateHaskell@ and evaluate @putStrLn $(manualGeneration -- "employees")@, for example. -- -- Note that to load this file into a REPL may require some fiddling -- with the path to the examples file in the 'csvTests' splice above. manualGeneration :: String -> TH.Q TH.Exp manualGeneration k = do csvExamples <- TH.runIO (examplesFrom "test/examples.toml") maybe (error ("Table " ++ k ++ " not found")) (generateCode "Row") (getFirst $ foldMap aux csvExamples) where aux :: CsvExample -> First String aux (CsvExample k' c _) = if k == k' then pure c else mempty type ManagersRow = Record ["id" :-> Int, "manager" :-> Text, "age" :-> Int, "pay" :-> Double] type NoTruncateRow = Record ["id" :-> Int, "foo" :-> Int] newtype Code = Code String instance Show Code where show (Code x) = x instance Eq Code where Code a == Code b = clean a == clean b where clean = go (removePrefix moduleNames) . filter (not . isSpace) go _ [] = [] go f s@(c:cs) = case removePrefix moduleNames s of Nothing -> c : go f cs Just n -> go f (drop n s) moduleNames = [ "GHC.Maybe." ] removePrefix [] _ = Nothing removePrefix (w:ws) s | w `isPrefixOf` s = Just (length w) | otherwise = removePrefix ws s shouldBeWithinEpsilon :: Double -> Double -> Expectation shouldBeWithinEpsilon actual expected = unless (abs (actual - expected) < 1e-6) (assertFailure (show actual ++ " is not very close to the expected value " ++ show expected)) main :: IO () main = do hspec $ do describe "Haskell type generation" $ mapM_ (\(CsvExample k _ g, g') -> it k (Code g' `shouldBe` Code g)) csvTests -- describe "Multiple tables" $ -- do _g <- H.runIO $ -- generatedFrom "test/examples.toml" "managers_employees" -- it "Shouldn't duplicate columns" pending describe "Writing CSV Data" $ do let csvInput = case find ((== "managers") . name . fst) csvTests of Just (ex,_) -> csv ex Nothing -> error "Couldn't find managers test data" frame <- H.runIO $ withSystemTempFile "FramesSpecOutput" $ \fp h -> do hClose h writeFile fp csvInput inCoreAoS (readTable fp) :: IO (Frame ManagersRow) let Identity csvOutput = toListM (produceCSV frame) frame2 <- H.runIO $ withSystemTempFile "FramesSpecOutput" $ \fp h -> do hClose h writeFile fp (unlines csvOutput) inCoreAoS (readTable fp) :: IO (Frame ManagersRow) -- The test data isn't formatted quite how we'd do it: text -- fields aren't quoted, and salaries represented as Doubles -- do not have decimal points. -- let csvInput' = T.replace "Joe" "\"Joe\"" -- . T.replace "Sarah" "\"Sarah\"" -- . T.replace "\"80,000\"" "80000.0" -- $ T.pack csvInput let csvInput' = T.replace "\"80,000\"" "80000.0" (T.pack csvInput) it "Produces expected output" $ T.unlines (map T.pack csvOutput) `shouldBe` csvInput' it "Produces parseable output" $ unless (frame == frame2) (assertFailure "Reparsed CSV differs from input") describe "Latin1 Text Encoding" $ do managers <- H.runIO Latin.managers it "Parses" $ managers `shouldBe` ["João", "Esperança"] describe "Skip Missing Data" $ do let csvInput = case find ((== "NoTruncate") . name . fst) csvTests of Just (ex,_) -> csv ex Nothing -> error "Couldn't find NoTruncate test data" frameMissing <- H.runIO $ withSystemTempFile "FramesSpecOutput" $ \fp h -> do hClose h writeFile fp csvInput inCoreAoS (readTable fp) :: IO (Frame NoTruncateRow) it "Doesn't truncate after missing data" $ F.length frameMissing `shouldBe` 3 describe "Skip NA Data" $ do let csvInput = case find ((== "NoTruncateNA") . name . fst) csvTests of Just (ex,_) -> csv ex Nothing -> error "Couldn't find NoTruncateNA test data" frameMissing <- H.runIO $ withSystemTempFile "FramesSpecOutput" $ \fp h -> do hClose h writeFile fp csvInput inCoreAoS (readTable fp) :: IO (Frame NoTruncateRow) it "Doesn't truncate after missing data" $ F.length frameMissing `shouldBe` 4 describe "Parses Issue 114 Data" $ do fnames <- H.runIO Issue114.testNames it "Extracts facility_name" $ fnames `shouldBe` ["LILLIAN B. SMITH, ET AL", "MUSSER, B W \"B\""] describe "Supports user-supplied column names" $ do res0 <- H.runIO (NoHeader.getJobAndSchooling 0) it "Extracts job from row 0" $ fst <$> res0 `shouldBe` Just "gov.administrators" it "Extracts schooling from row 0" $ maybe 99 snd res0 `shouldBeWithinEpsilon` 13.11 res9 <- H.runIO (NoHeader.getJobAndSchooling 9) it "Extracts job from row 9" $ fst <$> res9 `shouldBe` Just "mining.engineers" it "Extracts schooling from row 9" $ maybe 99 snd res9 `shouldBeWithinEpsilon` 14.64 resWithHeader <- H.runIO UncurryFold.averageRatio resNoHeader <- H.runIO UncurryFoldNoHeader.averageRatio it "Produces identical numerics independent of column names" $ resWithHeader `shouldBe` resNoHeader describe "Can read into Maybe values" $ do (n,avg) <- H.runIO UncurryFoldPartialData.incomeOfUnknownPrestige it "Found the expected number of partial rows" $ n `shouldBe` 4 it "Computed the average income correctly" $ avg `shouldBe` 3344.5 describe "Can generate categorical types" $ do mSmall <- H.runIO Categorical.fifthMonthSmall it "Generates enumerated types for small cardinality sets" $ mSmall `shouldBe` Just Categorical.SmallMonthMay mLarge <- H.runIO Categorical.fifthMonthLarge it "Falls back to Text when the number of variants grows" $ mLarge `shouldBe` Just (T.pack "May") mCustom <- H.runIO Categorical.fifthMonthCustom it "Can parse into manually-specified categorical variables" $ mCustom `shouldBe` Just Categorical.MyMay it "Can generate categorical types with space" $ enumFrom (minBound :: Issue145.RowCategoryName) `shouldBe` [ Issue145.RowCategoryNameBarCategory , Issue145.RowCategoryNameFooCategory] describe "Detects parse failures" $ do caught <- H.runIO $ (runSafeT $ do _ <- readColHeaders @Columns defaultParser (produceTokens "test/data/multiline.csv" defaultSep) return False) `catch` \(_ :: ErrorCall) -> return True it "Fails on embedded newlines" caught describe "Chunking" $ do let everyTenthEducation = [13.11,12.39,15.97,12.79,12.09,11.13,8.5,7.64,8.78,6.92,10.0] inCoreChunks <- H.runIO Chunks.chunkInCore it "Can split in-memory data into chunks" $ inCoreChunks `shouldBe` everyTenthEducation streamedChunks <- H.runIO Chunks.chunkStream it "Can split an input stream into Frame chunks" $ streamedChunks `shouldBe` everyTenthEducation describe "Printing with take and drop" $ do let frame :: Frame (Record '[ "id" :-> Int, "manager" :-> Text , "age" :-> Int, "pay" :-> Double ]) frame = toFrame [ xrec (1, "Joe", 53, 80000) , xrec (2, "Sarah", 44, 80000) ] it "Can format a Frame to a String" $ showFrame "\t" frame `shouldBe` unlines [ "id\tmanager\tage\tpay" , "1\t\"Joe\"\t53\t80000.0" , "2\t\"Sarah\"\t44\t80000.0" ] it "Can take the first row" $ showFrame "\t" (takeRows 1 frame) `shouldBe` unlines [ "id\tmanager\tage\tpay" , "1\t\"Joe\"\t53\t80000.0" ] it "Can drop the first row" $ showFrame "\t" (dropRows 1 frame) `shouldBe` unlines [ "id\tmanager\tage\tpay" , "2\t\"Sarah\"\t44\t80000.0" ]