Safe Haskell | None |
---|---|
Language | Haskell2010 |
A csv process based on attoparsec and the box library
Synopsis
- data CsvConfig = CsvConfig {}
- defaultCsvConfig :: CsvConfig
- file :: CsvConfig -> FilePath
- data Header
- rowEmitter :: CsvConfig -> (Char -> Parser a) -> Cont IO (Emitter IO (Either Text a))
- rowCommitter :: CsvConfig -> (a -> [Text]) -> Cont IO (Committer IO a)
- runCsv :: CsvConfig -> (Char -> Parser a) -> IO [Either Text a]
- sep :: Char -> Parser ()
- field_ :: Char -> Parser Text
- field :: Char -> Parser Text
- skipField_ :: Char -> Parser ()
- skipField :: Char -> Parser ()
- int :: Parser Int
- int' :: Char -> Parser Int
- double :: Parser Double
- double' :: Char -> Parser Double
- fields :: Char -> Parser [Text]
- scis :: Char -> Parser [Scientific]
- ints :: Char -> Parser [Int]
- doubles :: Char -> Parser [Double]
- day' :: Char -> Parser Day
- tod' :: Char -> Parser TimeOfDay
- localtime' :: Char -> Parser LocalTime
Documentation
csv file configuration
Instances
Eq CsvConfig Source # | |
Show CsvConfig Source # | |
Generic CsvConfig Source # | |
type Rep CsvConfig Source # | |
Defined in Box.Csv type Rep CsvConfig = D1 ('MetaData "CsvConfig" "Box.Csv" "box-csv-0.1.0-8IGh6KYQcZl3OcqQldNQ5U" 'False) (C1 ('MetaCons "CsvConfig" 'PrefixI 'True) ((S1 ('MetaSel ('Just "name") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 Text) :*: S1 ('MetaSel ('Just "suffix") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 Text)) :*: (S1 ('MetaSel ('Just "dir") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 Text) :*: (S1 ('MetaSel ('Just "fsep") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 Char) :*: S1 ('MetaSel ('Just "header") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedStrict) (Rec0 Header))))) |
defaultCsvConfig :: CsvConfig Source #
default csv file details
>>>
defaultCsvConfig
CsvConfig {name = "time_series_covid19_deaths_global_narrow", suffix = ".csv", dir = "./other", fsep = ',', header = HasHXL}
test data from https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases
file :: CsvConfig -> FilePath Source #
filepath for the config.
>>>
file defaultCsvConfig
"./other/time_series_covid19_deaths_global_narrow.csv"
Type of header rows. Note the modern propensity for multiple header rows.
rowEmitter :: CsvConfig -> (Char -> Parser a) -> Cont IO (Emitter IO (Either Text a)) Source #
A continuation emitter of parsed csv rows from a CsvConfig, returning the original text on failure
>>> rowEmitter defaultCsvConfig fields with
emit
Just (Right ["ProvinceState","CountryRegion",Lat,Long,Date,Value,"ISO 3166-1 Alpha 3-Codes","Region Code","Sub-region Code","Intermediate Region Coder"])
rowCommitter :: CsvConfig -> (a -> [Text]) -> Cont IO (Committer IO a) Source #
commits printed csv rows
>>>
let testConfig = CsvConfig "test" ".csv" "./test" ',' NoHeader
>>>
let ctest = rowCommitter testConfig (fmap (Text.intercalate "," . fmap (Text.pack . show)))
FIXME: fails if used outside this project.
> ctest with
(c -> commit c [[1..10::Int]])
True
rowEmitter testConfig ints `with` emit
Just (Right [1,2,3,4,5,6,7,8,9,10])
runCsv :: CsvConfig -> (Char -> Parser a) -> IO [Either Text a] Source #
Run a parser across all lines of a file.
>>>
r1 <- runCsv defaultCsvConfig fields
>>>
length r1
42562
>>>
length [x | (Left x) <- r1]
0
>>>
take 2 $ drop 2 [x | (Right x) <- r1]
[["","Afghanistan","33.0","65.0","2020-06-29","733","AFG","142","34","\r"],["","Afghanistan","33.0","65.0","2020-06-28","721","AFG","142","34","\r"]]
parsers
sep :: Char -> Parser () Source #
Most parsing and building routines implicity assume a character acting as a separator of fields, and newlines separating rows.
>>>
A.parse (sep ',') ",ok"
Done "ok" ()
field_ :: Char -> Parser Text Source #
an unquoted field Does not consume the separator token
>>>
A.parse (field_ ',') "field,ok"
Done ",ok" "field"
field :: Char -> Parser Text Source #
an unquoted field Consume the separator token
>>>
A.parse (field ',') "field,ok"
Done "ok" "field"
skipField_ :: Char -> Parser () Source #
skipping a field
>>>
A.parse (skipField_ ',') "field,ok"
Done ",ok" ()
skipField :: Char -> Parser () Source #
skipping a field
>>>
A.parse (skipField ',') "field,ok"
Done "ok" ()
int' :: Char -> Parser Int Source #
int parser, consumes separator
>>>
A.parse (int' ',') "234,ok"
Done "ok" 234
Parse a Double
.
This parser accepts an optional leading sign character, followed by
at most one decimal digit. The syntax is similar to that accepted by
the read
function, with the exception that a trailing '.'
is
consumed.
Examples
These examples use this helper:
r ::Parser
a ->Text
->Result
a r p s =feed
(parse
p s)mempty
Examples with behaviour identical to read
, if you feed an empty
continuation to the first result:
r double "3" == Done "" 3.0 r double "3.1" == Done "" 3.1 r double "3e4" == Done "" 30000.0 r double "3.1e4" == Done "" 31000.0 r double "3e" == Done "e" 3.0
Examples with behaviour identical to read
:
r double ".3" == Fail ".3" _ _ r double "e3" == Fail "e3" _ _
Example of difference from read
:
r double "3.foo" == Done "foo" 3.0
This function does not accept string representations of "NaN" or "Infinity".
double' :: Char -> Parser Double Source #
double parser, consumes separator
>>>
A.parse (double' ',') "234.000,ok"
Done "ok" 234.0
fields :: Char -> Parser [Text] Source #
Parser for a csv row of [Text]. TODO: deal with potential for an extra 'r'
>>>
A.parseOnly (fields ',') "field1,field2\r"
Right ["field1","field2\r"]
scis :: Char -> Parser [Scientific] Source #
parser for a csv row of [Scientific]
>>>
A.parseOnly (scis ',') "1,2.2,3.3"
Right [1.0,2.2,3.3]
ints :: Char -> Parser [Int] Source #
parser for a csv row of [Int]
>>>
A.parseOnly (ints ',') "1,2,3"
Right [1,2,3]
doubles :: Char -> Parser [Double] Source #
parser for a csv row of [Double]
>>>
A.parseOnly (doubles ',') "1,2,3"
Right [1.0,2.0,3.0]
day' :: Char -> Parser Day Source #
Day parser, consumes separator
>>>
A.parse (day' ',') "2020-07-01,ok"
Done "ok" 2020-07-01