| Copyright | (c) Richard Warfield |
|---|---|
| License | BSD 3-clause |
| Maintainer | richard@litx.io |
| Safe Haskell | None |
| Language | Haskell2010 |
Streamly.Csv
Description
Stream CSV data in/out using Cassava. Adapted from streaming-cassava.
For efficiency, operates on streams of strict ByteString chunks
(i.e. IsStream t => t m ByteString) rather than directly on streams of Word8.
The chunkStream function is useful for generating an input stream from a
Handle.
Example usage:
import Streamly
import qualified Streamly.Prelude as S
import Streamly.Csv (decode, encode, chunkStream)
import System.IO
import qualified Data.Csv as Csv
import qualified Data.ByteString as BS
import Data.Vector (Vector)
do
h <- openFile "testfile.csv" ReadMode
let chunks = chunkStream h (64*1024)
recs = decode Csv.HasHeader chunks :: SerialT IO (Vector BS.ByteString)
withFile "dest.csv" WriteMode $ \ho ->
S.mapM_ (BS.hPut ho) $ encode Nothing recsSynopsis
- decode :: (IsStream t, MonadAsync m, FromRecord a) => HasHeader -> t m ByteString -> t m a
- decodeWith :: (IsStream t, MonadAsync m, FromRecord a) => DecodeOptions -> HasHeader -> t m ByteString -> t m a
- decodeWithErrors :: (IsStream t, Monad m, FromRecord a, MonadThrow m) => DecodeOptions -> HasHeader -> t m ByteString -> t m (Either CsvParseException a)
- newtype CsvParseException = CsvParseException String
- chunkStream :: (IsStream t, MonadAsync m) => Handle -> Int -> t m ByteString
- decodeByName :: (MonadAsync m, FromNamedRecord a) => SerialT m ByteString -> SerialT m a
- decodeByNameWith :: (MonadAsync m, FromNamedRecord a) => DecodeOptions -> SerialT m ByteString -> SerialT m a
- decodeByNameWithErrors :: forall m a. (Monad m, MonadThrow m, FromNamedRecord a) => DecodeOptions -> SerialT m ByteString -> SerialT m (Either CsvParseException a)
- encode :: (IsStream t, ToRecord a, Monad m) => Maybe Header -> t m a -> t m ByteString
- encodeDefault :: forall a t m. (IsStream t, ToRecord a, DefaultOrdered a, Monad m) => t m a -> t m ByteString
- encodeWith :: (IsStream t, ToRecord a, Monad m) => EncodeOptions -> Maybe Header -> t m a -> t m ByteString
- encodeByName :: (IsStream t, ToNamedRecord a, Monad m) => Header -> t m a -> t m ByteString
- encodeByNameDefault :: forall a t m. (IsStream t, DefaultOrdered a, ToNamedRecord a, Monad m) => t m a -> t m ByteString
- encodeByNameWith :: (IsStream t, ToNamedRecord a, Monad m) => EncodeOptions -> Header -> t m a -> t m ByteString
- class FromRecord a where
- parseRecord :: Record -> Parser a
- class FromNamedRecord a where
- parseNamedRecord :: NamedRecord -> Parser a
- class ToRecord a where
- class ToNamedRecord a where
- toNamedRecord :: a -> NamedRecord
- class DefaultOrdered a where
- headerOrder :: a -> Header
- data HasHeader
- type Header = Vector Name
- header :: [ByteString] -> Header
- type Name = ByteString
- data DecodeOptions = DecodeOptions {
- decDelimiter :: !Word8
- defaultDecodeOptions :: DecodeOptions
- data EncodeOptions = EncodeOptions {
- encDelimiter :: !Word8
- encUseCrLf :: !Bool
- encIncludeHeader :: !Bool
- encQuoting :: !Quoting
- defaultEncodeOptions :: EncodeOptions
Decoding
decode :: (IsStream t, MonadAsync m, FromRecord a) => HasHeader -> t m ByteString -> t m a Source #
Use defaultOptions for decoding the provided CSV.
decodeWith :: (IsStream t, MonadAsync m, FromRecord a) => DecodeOptions -> HasHeader -> t m ByteString -> t m a Source #
Return back a stream of values from the provided CSV, stopping at the first error.
If you wish to instead ignore errors, consider using
decodeWithErrors with mapMaybe
Any remaining input is discarded.
decodeWithErrors :: (IsStream t, Monad m, FromRecord a, MonadThrow m) => DecodeOptions -> HasHeader -> t m ByteString -> t m (Either CsvParseException a) Source #
Return back a stream with an attempt at type conversion, and either the previous result or any overall parsing errors with the remainder of the input.
newtype CsvParseException Source #
Constructors
| CsvParseException String |
Instances
| Eq CsvParseException Source # | |
Defined in Streamly.Csv Methods (==) :: CsvParseException -> CsvParseException -> Bool # (/=) :: CsvParseException -> CsvParseException -> Bool # | |
| Show CsvParseException Source # | |
Defined in Streamly.Csv Methods showsPrec :: Int -> CsvParseException -> ShowS # show :: CsvParseException -> String # showList :: [CsvParseException] -> ShowS # | |
| IsString CsvParseException Source # | |
Defined in Streamly.Csv Methods fromString :: String -> CsvParseException # | |
| Exception CsvParseException Source # | |
Defined in Streamly.Csv Methods toException :: CsvParseException -> SomeException # | |
chunkStream :: (IsStream t, MonadAsync m) => Handle -> Int -> t m ByteString Source #
Named decoding
decodeByName :: (MonadAsync m, FromNamedRecord a) => SerialT m ByteString -> SerialT m a Source #
Use defaultOptions for decoding the provided CSV.
decodeByNameWith :: (MonadAsync m, FromNamedRecord a) => DecodeOptions -> SerialT m ByteString -> SerialT m a Source #
Return back a stream of values from the provided CSV, stopping at the first error.
A header is required to determine the order of columns, but then discarded.
If you wish to instead ignore errors, consider using
decodeByNameWithErrors with mapMaybe
Any remaining input is discarded.
decodeByNameWithErrors :: forall m a. (Monad m, MonadThrow m, FromNamedRecord a) => DecodeOptions -> SerialT m ByteString -> SerialT m (Either CsvParseException a) Source #
Return back a stream with an attempt at type conversion, but where the order of columns doesn't have to match the order of fields of your actual type.
This requires/assumes a header in the CSV stream, which is discarded after parsing.
Encoding
encodeDefault :: forall a t m. (IsStream t, ToRecord a, DefaultOrdered a, Monad m) => t m a -> t m ByteString Source #
Encode a stream of values with the default options and a derived header prefixed.
encodeWith :: (IsStream t, ToRecord a, Monad m) => EncodeOptions -> Maybe Header -> t m a -> t m ByteString Source #
Encode a stream of values with the provided options.
Optionally prefix the stream with headers (the header function
may be useful).
Named encoding
encodeByName :: (IsStream t, ToNamedRecord a, Monad m) => Header -> t m a -> t m ByteString Source #
Select the columns that you wish to encode from your data structure using default options (which currently includes printing the header).
encodeByNameDefault :: forall a t m. (IsStream t, DefaultOrdered a, ToNamedRecord a, Monad m) => t m a -> t m ByteString Source #
Use the default ordering to encode all fields/columns.
encodeByNameWith :: (IsStream t, ToNamedRecord a, Monad m) => EncodeOptions -> Header -> t m a -> t m ByteString Source #
Select the columns that you wish to encode from your data structure.
Header printing respects encIncludeheader.
Re-exports
class FromRecord a where #
A type that can be converted from a single CSV record, with the possibility of failure.
When writing an instance, use empty, mzero, or fail to make a
conversion fail, e.g. if a Record has the wrong number of
columns.
Given this example data:
John,56 Jane,55
here's an example type and instance:
data Person = Person { name :: !Text, age :: !Int }
instance FromRecord Person where
parseRecord v
| length v == 2 = Person <$>
v .! 0 <*>
v .! 1
| otherwise = mzeroMinimal complete definition
Nothing
Methods
parseRecord :: Record -> Parser a #
Instances
class FromNamedRecord a where #
A type that can be converted from a single CSV record, with the possibility of failure.
When writing an instance, use empty, mzero, or fail to make a
conversion fail, e.g. if a Record has the wrong number of
columns.
Given this example data:
name,age John,56 Jane,55
here's an example type and instance:
{-# LANGUAGE OverloadedStrings #-}
data Person = Person { name :: !Text, age :: !Int }
instance FromNamedRecord Person where
parseNamedRecord m = Person <$>
m .: "name" <*>
m .: "age"Note the use of the OverloadedStrings language extension which
enables ByteString values to be written as string literals.
Minimal complete definition
Nothing
Methods
parseNamedRecord :: NamedRecord -> Parser a #
Instances
| (FromField a, FromField b, Ord a) => FromNamedRecord (Map a b) | |
Defined in Data.Csv.Conversion Methods parseNamedRecord :: NamedRecord -> Parser (Map a b) # | |
| (Eq a, FromField a, FromField b, Hashable a) => FromNamedRecord (HashMap a b) | |
Defined in Data.Csv.Conversion Methods parseNamedRecord :: NamedRecord -> Parser (HashMap a b) # | |
A type that can be converted to a single CSV record.
An example type and instance:
data Person = Person { name :: !Text, age :: !Int }
instance ToRecord Person where
toRecord (Person name age) = record [
toField name, toField age]Outputs data on this form:
John,56 Jane,55
Minimal complete definition
Nothing
Instances
class ToNamedRecord a where #
A type that can be converted to a single CSV record.
An example type and instance:
data Person = Person { name :: !Text, age :: !Int }
instance ToNamedRecord Person where
toNamedRecord (Person name age) = namedRecord [
"name" .= name, "age" .= age]Minimal complete definition
Nothing
Instances
| (ToField a, ToField b, Ord a) => ToNamedRecord (Map a b) | |
Defined in Data.Csv.Conversion Methods toNamedRecord :: Map a b -> NamedRecord # | |
| (Eq a, ToField a, ToField b, Hashable a) => ToNamedRecord (HashMap a b) | |
Defined in Data.Csv.Conversion Methods toNamedRecord :: HashMap a b -> NamedRecord # | |
class DefaultOrdered a where #
A type that has a default field order when converted to CSV. This
class lets you specify how to get the headers to use for a record
type that's an instance of ToNamedRecord.
To derive an instance, the type is required to only have one constructor and that constructor must have named fields (also known as selectors) for all fields.
Right: data Foo = Foo { foo :: !Int }
Wrong: data Bar = Bar Int
If you try to derive an instance using GHC generics and your type doesn't have named fields, you will get an error along the lines of:
<interactive>:9:10:
No instance for (DefaultOrdered (M1 S NoSelector (K1 R Char) ()))
arising from a use of ‘Data.Csv.Conversion.$gdmheader’
In the expression: Data.Csv.Conversion.$gdmheader
In an equation for ‘header’:
header = Data.Csv.Conversion.$gdmheader
In the instance declaration for ‘DefaultOrdered Foo’Minimal complete definition
Nothing
Methods
headerOrder :: a -> Header #
The header order for this record. Should include the names
used in the NamedRecord returned by ToNamedRecord. Pass
undefined as the argument, together with a type annotation
e.g. .headerOrder (undefined :: MyRecord)
Is the CSV data preceded by a header?
The header corresponds to the first line a CSV file. Not all CSV files have a header.
header :: [ByteString] -> Header #
Construct a header from a list of ByteStrings.
type Name = ByteString #
A header has one or more names, describing the data in the column following the name.
data DecodeOptions #
Options that controls how data is decoded. These options can be used to e.g. decode tab-separated data instead of comma-separated data.
To avoid having your program stop compiling when new fields are
added to DecodeOptions, create option records by overriding
values in defaultDecodeOptions. Example:
myOptions = defaultDecodeOptions {
decDelimiter = fromIntegral (ord '\t')
}Constructors
| DecodeOptions | |
Fields
| |
Instances
| Eq DecodeOptions | |
Defined in Data.Csv.Parser Methods (==) :: DecodeOptions -> DecodeOptions -> Bool # (/=) :: DecodeOptions -> DecodeOptions -> Bool # | |
| Show DecodeOptions | |
Defined in Data.Csv.Parser Methods showsPrec :: Int -> DecodeOptions -> ShowS # show :: DecodeOptions -> String # showList :: [DecodeOptions] -> ShowS # | |
defaultDecodeOptions :: DecodeOptions #
Decoding options for parsing CSV files.
data EncodeOptions #
Options that controls how data is encoded. These options can be used to e.g. encode data in a tab-separated format instead of in a comma-separated format.
To avoid having your program stop compiling when new fields are
added to EncodeOptions, create option records by overriding
values in defaultEncodeOptions. Example:
myOptions = defaultEncodeOptions {
encDelimiter = fromIntegral (ord '\t')
}N.B. The encDelimiter must not be the quote character (i.e.
") or one of the record separator characters (i.e. \n or
\r).
Constructors
| EncodeOptions | |
Fields
| |
Instances
| Eq EncodeOptions | |
Defined in Data.Csv.Encoding Methods (==) :: EncodeOptions -> EncodeOptions -> Bool # (/=) :: EncodeOptions -> EncodeOptions -> Bool # | |
| Show EncodeOptions | |
Defined in Data.Csv.Encoding Methods showsPrec :: Int -> EncodeOptions -> ShowS # show :: EncodeOptions -> String # showList :: [EncodeOptions] -> ShowS # | |
defaultEncodeOptions :: EncodeOptions #
Encoding options for CSV files.