box-csv-0.0.3: See readme.md
Safe HaskellNone
LanguageHaskell2010

Box.Csv

Contents

Description

A csv process based on attoparsec and the box library

Synopsis

Documentation

data CsvConfig Source #

csv file configuration

Constructors

CsvConfig 

Fields

Instances

Instances details
Eq CsvConfig Source # 
Instance details

Defined in Box.Csv

Show CsvConfig Source # 
Instance details

Defined in Box.Csv

Generic CsvConfig Source # 
Instance details

Defined in Box.Csv

Associated Types

type Rep CsvConfig :: Type -> Type #

type Rep CsvConfig Source # 
Instance details

Defined in Box.Csv

defaultCsvConfig :: CsvConfig Source #

default csv file details

>>> defaultCsvConfig
CsvConfig {name = "time_series_covid19_deaths_global_narrow", suffix = ".csv", dir = "./other", fsep = ',', header = HasHXL}

test data from https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases

file :: CsvConfig -> FilePath Source #

filepath for the config.

>>> file defaultCsvConfig
"./other/time_series_covid19_deaths_global_narrow.csv"

data Header Source #

Type of header rows. Note the modern propensity for multiple header rows.

Constructors

HasHeader 
HasHXL 
NoHeader 

Instances

Instances details
Eq Header Source # 
Instance details

Defined in Box.Csv

Methods

(==) :: Header -> Header -> Bool #

(/=) :: Header -> Header -> Bool #

Show Header Source # 
Instance details

Defined in Box.Csv

rowEmitter :: CsvConfig -> (Char -> Parser a) -> Cont IO (Emitter IO (Either Text a)) Source #

A continuation emitter of parsed csv rows from a CsvConfig, returning the original text on failure >>> rowEmitter defaultCsvConfig fields with emit Just (Right ["ProvinceState","CountryRegion",Lat,Long,Date,Value,"ISO 3166-1 Alpha 3-Codes","Region Code","Sub-region Code","Intermediate Region Coder"])

rowCommitter :: CsvConfig -> (a -> [Text]) -> Cont IO (Committer IO a) Source #

commits printed csv rows

>>> let testConfig = CsvConfig "test" ".csv" "./test" ',' NoHeader
>>> let ctest = rowCommitter testConfig (fmap (Text.intercalate "," . fmap show))
>>> ctest `with` (\c -> commit c [[1..10::Int]])
True
>>> rowEmitter testConfig ints `with` emit
Just (Right [1,2,3,4,5,6,7,8,9,10])

runCsv :: CsvConfig -> (Char -> Parser a) -> IO [Either Text a] Source #

Run a parser across all lines of a file.

>>> r1 <- runCsv defaultCsvConfig fields
>>> length r1
42562
>>> length [x | (Left x) <- r1]
0
>>> take 2 $ drop 2 [x | (Right x) <- r1]
[["","Afghanistan","33.0","65.0","2020-06-29","733","AFG","142","34","\r"],["","Afghanistan","33.0","65.0","2020-06-28","721","AFG","142","34","\r"]]

parsers

sep :: Char -> Parser () Source #

Most parsing and building routines implicity assume a character acting as a separator of fields, and newlines separating rows.

>>> A.parse (sep ',') ",ok"
Done "ok" ()

field_ :: Char -> Parser Text Source #

an unquoted field Does not consume the separator token

>>> A.parse (field_ ',') "field,ok"
Done ",ok" "field"

field :: Char -> Parser Text Source #

an unquoted field Consume the separator token

>>> A.parse (field ',') "field,ok"
Done "ok" "field"

skipField_ :: Char -> Parser () Source #

skipping a field

>>> A.parse (skipField_ ',') "field,ok"
Done ",ok" ()

skipField :: Char -> Parser () Source #

skipping a field

>>> A.parse (skipField ',') "field,ok"
Done "ok" ()

int :: Parser Int Source #

int parser

>>> A.parse int "234,ok"
Done ",ok" 234

int' :: Char -> Parser Int Source #

int parser, consumes separator

>>> A.parse (int' ',') "234,ok"
Done "ok" 234

double :: Parser Double #

Parse a rational number.

This parser accepts an optional leading sign character, followed by at least one decimal digit. The syntax similar to that accepted by the read function, with the exception that a trailing '.' or 'e' not followed by a number is not consumed.

Examples with behaviour identical to read, if you feed an empty continuation to the first result:

rational "3"     == Done 3.0 ""
rational "3.1"   == Done 3.1 ""
rational "3e4"   == Done 30000.0 ""
rational "3.1e4" == Done 31000.0, ""

Examples with behaviour identical to read:

rational ".3"    == Fail "input does not start with a digit"
rational "e3"    == Fail "input does not start with a digit"

Examples of differences from read:

rational "3.foo" == Done 3.0 ".foo"
rational "3e"    == Done 3.0 "e"

This function does not accept string representations of "NaN" or "Infinity".

double' :: Char -> Parser Double Source #

double parser, consumes separator

>>> A.parse (double' ',') "234.000,ok"
Done "ok" 234.0

fields :: Char -> Parser [Text] Source #

Parser for a csv row of [Text]. TODO: deal with potential for an extra 'r'

>>> A.parseOnly (fields ',') "field1,field2\r"
Right ["field1","field2\r"]

scis :: Char -> Parser [Scientific] Source #

parser for a csv row of [Scientific]

>>> A.parseOnly (scis ',') "1,2.2,3.3"
Right [1.0,2.2,3.3]

ints :: Char -> Parser [Int] Source #

parser for a csv row of [Int]

>>> A.parseOnly (ints ',') "1,2,3"
Right [1,2,3]

doubles :: Char -> Parser [Double] Source #

parser for a csv row of [Double]

>>> A.parseOnly (doubles ',') "1,2,3"
Right [1.0,2.0,3.0]

day' :: Char -> Parser Day Source #

Day parser, consumes separator

>>> A.parse (day' ',') "2020-07-01,ok"
Done "ok" 2020-07-01

tod' :: Char -> Parser TimeOfDay Source #

TimeOfDay parser, consumes separator

>>> A.parse (tod' ',') "23:52:05.221109,ok"
Done "ok" 23:52:05.221109

localtime' :: Char -> Parser LocalTime Source #

TimeOfDay parser, consumes separator

>>> A.parse (localtime' ',') "Jun 24 8:24AM,ok"
Done "ok" 2020-06-24 08:24:00