Safe Haskell | None |
---|---|
Language | Haskell2010 |
A csv process based on attoparsec and the box library
Synopsis
- data CsvConfig = CsvConfig {}
- defaultCsvConfig :: CsvConfig
- file :: CsvConfig -> FilePath
- data Header
- rowEmitter :: CsvConfig -> (Char -> Parser a) -> Cont IO (Emitter IO (Either Text a))
- rowCommitter :: CsvConfig -> (a -> [Text]) -> Cont IO (Committer IO a)
- runCsv :: CsvConfig -> (Char -> Parser a) -> IO [Either Text a]
- sep :: Char -> Parser ()
- field_ :: Char -> Parser Text
- field :: Char -> Parser Text
- skipField_ :: Char -> Parser ()
- skipField :: Char -> Parser ()
- int :: Parser Int
- int' :: Char -> Parser Int
- double :: Parser Double
- double' :: Char -> Parser Double
- fields :: Char -> Parser [Text]
- scis :: Char -> Parser [Scientific]
- ints :: Char -> Parser [Int]
- doubles :: Char -> Parser [Double]
- day' :: Char -> Parser Day
- tod' :: Char -> Parser TimeOfDay
- localtime' :: Char -> Parser LocalTime
Documentation
csv file configuration
Instances
Eq CsvConfig Source # | |
Show CsvConfig Source # | |
Generic CsvConfig Source # | |
type Rep CsvConfig Source # | |
Defined in Box.Csv type Rep CsvConfig = D1 ('MetaData "CsvConfig" "Box.Csv" "box-csv-0.0.3-L6hWntZV0UGFOfOxcCfQkQ" 'False) (C1 ('MetaCons "CsvConfig" 'PrefixI 'True) ((S1 ('MetaSel ('Just "name") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Text) :*: S1 ('MetaSel ('Just "suffix") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Text)) :*: (S1 ('MetaSel ('Just "dir") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Text) :*: (S1 ('MetaSel ('Just "fsep") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Char) :*: S1 ('MetaSel ('Just "header") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 Header))))) |
defaultCsvConfig :: CsvConfig Source #
default csv file details
>>>
defaultCsvConfig
CsvConfig {name = "time_series_covid19_deaths_global_narrow", suffix = ".csv", dir = "./other", fsep = ',', header = HasHXL}
test data from https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases
file :: CsvConfig -> FilePath Source #
filepath for the config.
>>>
file defaultCsvConfig
"./other/time_series_covid19_deaths_global_narrow.csv"
Type of header rows. Note the modern propensity for multiple header rows.
rowEmitter :: CsvConfig -> (Char -> Parser a) -> Cont IO (Emitter IO (Either Text a)) Source #
A continuation emitter of parsed csv rows from a CsvConfig, returning the original text on failure
>>> rowEmitter defaultCsvConfig fields with
emit
Just (Right ["ProvinceState","CountryRegion",Lat,Long,Date,Value,"ISO 3166-1 Alpha 3-Codes","Region Code","Sub-region Code","Intermediate Region Coder"])
rowCommitter :: CsvConfig -> (a -> [Text]) -> Cont IO (Committer IO a) Source #
commits printed csv rows
>>>
let testConfig = CsvConfig "test" ".csv" "./test" ',' NoHeader
>>>
let ctest = rowCommitter testConfig (fmap (Text.intercalate "," . fmap show))
>>>
ctest `with` (\c -> commit c [[1..10::Int]])
True
>>>
rowEmitter testConfig ints `with` emit
Just (Right [1,2,3,4,5,6,7,8,9,10])
runCsv :: CsvConfig -> (Char -> Parser a) -> IO [Either Text a] Source #
Run a parser across all lines of a file.
>>>
r1 <- runCsv defaultCsvConfig fields
>>>
length r1
42562
>>>
length [x | (Left x) <- r1]
0
>>>
take 2 $ drop 2 [x | (Right x) <- r1]
[["","Afghanistan","33.0","65.0","2020-06-29","733","AFG","142","34","\r"],["","Afghanistan","33.0","65.0","2020-06-28","721","AFG","142","34","\r"]]
parsers
sep :: Char -> Parser () Source #
Most parsing and building routines implicity assume a character acting as a separator of fields, and newlines separating rows.
>>>
A.parse (sep ',') ",ok"
Done "ok" ()
field_ :: Char -> Parser Text Source #
an unquoted field Does not consume the separator token
>>>
A.parse (field_ ',') "field,ok"
Done ",ok" "field"
field :: Char -> Parser Text Source #
an unquoted field Consume the separator token
>>>
A.parse (field ',') "field,ok"
Done "ok" "field"
skipField_ :: Char -> Parser () Source #
skipping a field
>>>
A.parse (skipField_ ',') "field,ok"
Done ",ok" ()
skipField :: Char -> Parser () Source #
skipping a field
>>>
A.parse (skipField ',') "field,ok"
Done "ok" ()
int' :: Char -> Parser Int Source #
int parser, consumes separator
>>>
A.parse (int' ',') "234,ok"
Done "ok" 234
Parse a rational number.
This parser accepts an optional leading sign character, followed by
at least one decimal digit. The syntax similar to that accepted by
the read
function, with the exception that a trailing '.'
or
'e'
not followed by a number is not consumed.
Examples with behaviour identical to read
, if you feed an empty
continuation to the first result:
rational "3" == Done 3.0 "" rational "3.1" == Done 3.1 "" rational "3e4" == Done 30000.0 "" rational "3.1e4" == Done 31000.0, ""
Examples with behaviour identical to read
:
rational ".3" == Fail "input does not start with a digit" rational "e3" == Fail "input does not start with a digit"
Examples of differences from read
:
rational "3.foo" == Done 3.0 ".foo" rational "3e" == Done 3.0 "e"
This function does not accept string representations of "NaN" or "Infinity".
double' :: Char -> Parser Double Source #
double parser, consumes separator
>>>
A.parse (double' ',') "234.000,ok"
Done "ok" 234.0
fields :: Char -> Parser [Text] Source #
Parser for a csv row of [Text]. TODO: deal with potential for an extra 'r'
>>>
A.parseOnly (fields ',') "field1,field2\r"
Right ["field1","field2\r"]
scis :: Char -> Parser [Scientific] Source #
parser for a csv row of [Scientific]
>>>
A.parseOnly (scis ',') "1,2.2,3.3"
Right [1.0,2.2,3.3]
ints :: Char -> Parser [Int] Source #
parser for a csv row of [Int]
>>>
A.parseOnly (ints ',') "1,2,3"
Right [1,2,3]
doubles :: Char -> Parser [Double] Source #
parser for a csv row of [Double]
>>>
A.parseOnly (doubles ',') "1,2,3"
Right [1.0,2.0,3.0]
day' :: Char -> Parser Day Source #
Day parser, consumes separator
>>>
A.parse (day' ',') "2020-07-01,ok"
Done "ok" 2020-07-01