box-csv-0.2.0: CSV parsing in a box.
Safe HaskellNone
LanguageHaskell2010

Box.Csv

Contents

Description

A csv process based on attoparsec and the box library

Synopsis

Documentation

data CsvConfig Source #

csv file configuration

Constructors

CsvConfig 

Fields

Instances

Instances details
Eq CsvConfig Source # 
Instance details

Defined in Box.Csv

Show CsvConfig Source # 
Instance details

Defined in Box.Csv

Generic CsvConfig Source # 
Instance details

Defined in Box.Csv

Associated Types

type Rep CsvConfig :: Type -> Type #

type Rep CsvConfig Source # 
Instance details

Defined in Box.Csv

type Rep CsvConfig = D1 ('MetaData "CsvConfig" "Box.Csv" "box-csv-0.2.0-7GhFxJeLMLLAAK4EeBTXsI" 'False) (C1 ('MetaCons "CsvConfig" 'PrefixI 'True) (S1 ('MetaSel ('Just "file") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 FilePath) :*: (S1 ('MetaSel ('Just "fsep") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 Char) :*: S1 ('MetaSel ('Just "header") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 Header))))

defaultCsvConfig :: CsvConfig Source #

default csv file details

>>> defaultCsvConfig
CsvConfig {file = "./other/time_series_covid19_deaths_global_narrow.csv", fsep = ',', header = HasHXL}

test data from https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases

data Header Source #

Type of header rows. Note the modern propensity for multiple header rows.

Constructors

HasHeader 
HasHXL 
NoHeader 

Instances

Instances details
Eq Header Source # 
Instance details

Defined in Box.Csv

Methods

(==) :: Header -> Header -> Bool #

(/=) :: Header -> Header -> Bool #

Show Header Source # 
Instance details

Defined in Box.Csv

rowEmitter :: CsvConfig -> (Char -> Parser a) -> CoEmitter IO (Either Text a) Source #

A continuation emitter of parsed csv rows from a CsvConfig, returning the original text on failure >>> emit $| rowEmitter defaultCsvConfig fields Just (Right ["ProvinceState","CountryRegion",Lat,Long,Date,Value,"ISO 3166-1 Alpha 3-Codes","Region Code","Sub-region Code","Intermediate Region Coder"])

rowCommitter :: CsvConfig -> (a -> [Text]) -> CoCommitter IO a Source #

commits printed csv rows

>>> let testConfig = CsvConfig "./test/test.csv" ',' NoHeader
>>> let ctest = rowCommitter testConfig (fmap (Text.intercalate "," . fmap (Text.pack . show)))
>>> (\c -> commit c [[1..10::Int]]) <$|> ctest
True
>>> emit <$|> rowEmitter testConfig ints
Just (Right [1,2,3,4,5,6,7,8,9,10])

runCsv :: CsvConfig -> (Char -> Parser a) -> IO [Either Text a] Source #

Run a parser across all lines of a file.

>>> r1 <- runCsv defaultCsvConfig fields
>>> length r1
42562
>>> length [x | (Left x) <- r1]
0
>>> take 2 $ drop 2 [x | (Right x) <- r1]
[["","Afghanistan","33.0","65.0","2020-06-29","733","AFG","142","34","\r"],["","Afghanistan","33.0","65.0","2020-06-28","721","AFG","142","34","\r"]]

parsers

sep :: Char -> Parser () Source #

Most parsing and building routines implicity assume a character acting as a separator of fields, and newlines separating rows.

>>> A.parse (sep ',') ",ok"
Done "ok" ()

field_ :: Char -> Parser Text Source #

an unquoted field Does not consume the separator token

>>> A.parse (field_ ',') "field,ok"
Done ",ok" "field"

field :: Char -> Parser Text Source #

an unquoted field Consume the separator token

>>> A.parse (field ',') "field,ok"
Done "ok" "field"

skipField_ :: Char -> Parser () Source #

skipping a field

>>> A.parse (skipField_ ',') "field,ok"
Done ",ok" ()

skipField :: Char -> Parser () Source #

skipping a field

>>> A.parse (skipField ',') "field,ok"
Done "ok" ()

int :: Parser Int Source #

int parser

>>> A.parse int "234,ok"
Done ",ok" 234

int' :: Char -> Parser Int Source #

int parser, consumes separator

>>> A.parse (int' ',') "234,ok"
Done "ok" 234

double :: Parser Double #

Parse a Double.

This parser accepts an optional leading sign character, followed by at most one decimal digit. The syntax is similar to that accepted by the read function, with the exception that a trailing '.' is consumed.

Examples

These examples use this helper:

r :: Parser a -> Text -> Result a
r p s = feed (parse p s) mempty

Examples with behaviour identical to read, if you feed an empty continuation to the first result:

r double "3"     == Done "" 3.0
r double "3.1"   == Done "" 3.1
r double "3e4"   == Done "" 30000.0
r double "3.1e4" == Done "" 31000.0
r double "3e"    == Done "e" 3.0

Examples with behaviour identical to read:

r double ".3"    == Fail ".3" _ _
r double "e3"    == Fail "e3" _ _

Example of difference from read:

r double "3.foo" == Done "foo" 3.0

This function does not accept string representations of "NaN" or "Infinity".

double' :: Char -> Parser Double Source #

double parser, consumes separator

>>> A.parse (double' ',') "234.000,ok"
Done "ok" 234.0

fields :: Char -> Parser [Text] Source #

Parser for a csv row of [Text].

>>> A.parseOnly (fields ',') "field1,field2\r"
Right ["field1","field2\r"]

ints :: Char -> Parser [Int] Source #

parser for a csv row of [Int]

>>> A.parseOnly (ints ',') "1,2,3"
Right [1,2,3]

doubles :: Char -> Parser [Double] Source #

parser for a csv row of [Double]

>>> A.parseOnly (doubles ',') "1,2,3"
Right [1.0,2.0,3.0]

day' :: Char -> Parser Day Source #

Day parser, consumes separator

>>> A.parse (day' ',') "2020-07-01,ok"
Done "ok" 2020-07-01

tod' :: Char -> Parser TimeOfDay Source #

TimeOfDay parser, consumes separator

>>> A.parse (tod' ',') "23:52:05.221109,ok"
Done "ok" 23:52:05.221109

localtime' :: Char -> Parser LocalTime Source #

TimeOfDay parser, consumes separator

>>> A.parse (localtime' ',') "Jun 24 8:24AM,ok"
Done "ok" 2020-06-24 08:24:00