{-# LANGUAGE AllowAmbiguousTypes #-}
{-# LANGUAGE CPP #-}

-- | Stream, parse, and validate CSVs
--
-- A minor extension of [cassava](https://hackage.haskell.org/package/cassava).
-- Using `MonadValidate` and `Conduit`.
module Freckle.App.Csv
  ( csvWithValidationSink
  , csvWithParserAndValidationSink

    -- * Conduit Primitives
  , runCsvConduit
  , decodeCsv

    -- * Header Validation
  , ValidateHeader
  , validateHeader
  , hasHeader
  , defaultValidateOrderedHeader

    -- * Exceptions
  , CsvException (..)

    -- * Options
  , defaultOptions
  ) where

import Freckle.App.Prelude

import Conduit
import Control.Monad (foldM)
import Control.Monad.Validate
  ( MonadValidate (..)
  , Validate
  , ValidateT
  , refute
  , runValidate
  , runValidateT
  )
import Data.Aeson (KeyValue (..), ToJSON (..), object, pairs, (.=))
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as BS8
import qualified Data.Conduit.Combinators as Conduit
import qualified Data.Conduit.Text as Conduit
import Data.Csv
  ( DefaultOrdered
  , FromNamedRecord (..)
  , Header
  , Name
  , NamedRecord
  , Parser
  , defaultDecodeOptions
  , defaultOptions
  , headerOrder
  )
import qualified Data.Csv.Incremental as CsvI
import Data.Functor.Bind (Bind)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.List.NonEmpty as NE
import Data.Proxy (Proxy (Proxy))
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Sequence.NonEmpty (NESeq)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Vector as V

-- | Treat CSV header line as 1
--
-- CSVs can break rows over lines, but we don't currently handle that.
headerLineNumber :: Int
headerLineNumber :: Int
headerLineNumber = Int
1

class ValidateHeader a where
  validateHeader
    :: (Bind m, Monad m) => proxy a -> Header -> ValidateT (NESeq String) m ()

hasHeader :: Monad m => Header -> Name -> ValidateT (NESeq String) m ()
hasHeader :: forall (m :: * -> *).
Monad m =>
Header -> ByteString -> ValidateT (NESeq String) m ()
hasHeader Header
h ByteString
name
  | ByteString
name ByteString -> Header -> Bool
forall a. Eq a => a -> Vector a -> Bool
`V.elem` Header
h = () -> ValidateT (NESeq String) m ()
forall a. a -> ValidateT (NESeq String) m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
  | Bool
otherwise = NESeq String -> ValidateT (NESeq String) m ()
forall a. NESeq String -> ValidateT (NESeq String) m a
forall e (m :: * -> *) a. MonadValidate e m => e -> m a
refute (NESeq String -> ValidateT (NESeq String) m ())
-> (String -> NESeq String)
-> String
-> ValidateT (NESeq String) m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> NESeq String
forall a. a -> NESeq a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (String -> ValidateT (NESeq String) m ())
-> String -> ValidateT (NESeq String) m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> String
BS8.unpack ByteString
name

defaultValidateOrderedHeader
  :: forall a proxy m
   . (DefaultOrdered a, Monad m)
  => proxy a
  -> Header
  -> ValidateT (NESeq String) m ()
defaultValidateOrderedHeader :: forall a (proxy :: * -> *) (m :: * -> *).
(DefaultOrdered a, Monad m) =>
proxy a -> Header -> ValidateT (NESeq String) m ()
defaultValidateOrderedHeader proxy a
_ Header
h =
  (ByteString -> ValidateT (NESeq String) m ())
-> Header -> ValidateT (NESeq String) m ()
forall (t :: * -> *) (f :: * -> *) a b.
(Foldable t, Applicative f) =>
(a -> f b) -> t a -> f ()
traverse_ (Header
h Header -> ByteString -> ValidateT (NESeq String) m ()
forall (m :: * -> *).
Monad m =>
Header -> ByteString -> ValidateT (NESeq String) m ()
`hasHeader`) (Header -> ValidateT (NESeq String) m ())
-> Header -> ValidateT (NESeq String) m ()
forall a b. (a -> b) -> a -> b
$ a -> Header
forall a. DefaultOrdered a => a -> Header
headerOrder (String -> a
forall a. HasCallStack => String -> a
error String
"old school haskell" :: a)

-- | Stream parse a CSV
--
-- - Expects UTF-8
-- - Provides incremental validation
csvWithValidationSink
  :: forall a b err m
   . ( MonadThrow m
     , MonadUnliftIO m
     , PrimMonad m
     , ValidateHeader a
     , FromNamedRecord a
     )
  => ConduitT () ByteString (ResourceT m) ()
  -- ^ CSV as a byte stream
  -> (Vector a -> Validate (NonEmpty (CsvException err)) (Vector b))
  -- ^ Validation to apply to resulting rows
  -> m (Validate (NonEmpty (CsvException err)) (Vector b))
csvWithValidationSink :: forall a b err (m :: * -> *).
(MonadThrow m, MonadUnliftIO m, PrimMonad m, ValidateHeader a,
 FromNamedRecord a) =>
ConduitT () ByteString (ResourceT m) ()
-> (Vector a -> Validate (NonEmpty (CsvException err)) (Vector b))
-> m (Validate (NonEmpty (CsvException err)) (Vector b))
csvWithValidationSink =
  (Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a)
-> ConduitT () ByteString (ResourceT m) ()
-> (Vector a -> Validate (NonEmpty (CsvException err)) (Vector b))
-> m (Validate (NonEmpty (CsvException err)) (Vector b))
forall a b err (m :: * -> *).
(MonadThrow m, MonadUnliftIO m, PrimMonad m) =>
(Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a)
-> ConduitT () ByteString (ResourceT m) ()
-> (Vector a -> Validate (NonEmpty (CsvException err)) (Vector b))
-> m (Validate (NonEmpty (CsvException err)) (Vector b))
csvWithParserAndValidationSink (Proxy a -> Header -> Validate (NESeq String) ()
forall a (m :: * -> *) (proxy :: * -> *).
(ValidateHeader a, Bind m, Monad m) =>
proxy a -> Header -> ValidateT (NESeq String) m ()
forall (m :: * -> *) (proxy :: * -> *).
(Bind m, Monad m) =>
proxy a -> Header -> ValidateT (NESeq String) m ()
validateHeader (forall t. Proxy t
forall {k} (t :: k). Proxy t
Proxy @a)) NamedRecord -> Parser a
forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord

-- | Stream parse a CSV with a custom parser
--
-- - Expects UTF-8
-- - Provides incremental validation
csvWithParserAndValidationSink
  :: forall a b err m
   . (MonadThrow m, MonadUnliftIO m, PrimMonad m)
  => (Header -> Validate (NESeq String) ())
  -> (NamedRecord -> Parser a)
  -- ^ Custom record parser
  -> ConduitT () ByteString (ResourceT m) ()
  -- ^ CSV as a byte stream
  -> (Vector a -> Validate (NonEmpty (CsvException err)) (Vector b))
  -- ^ Validation to apply to resulting rows
  -> m (Validate (NonEmpty (CsvException err)) (Vector b))
csvWithParserAndValidationSink :: forall a b err (m :: * -> *).
(MonadThrow m, MonadUnliftIO m, PrimMonad m) =>
(Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a)
-> ConduitT () ByteString (ResourceT m) ()
-> (Vector a -> Validate (NonEmpty (CsvException err)) (Vector b))
-> m (Validate (NonEmpty (CsvException err)) (Vector b))
csvWithParserAndValidationSink Header -> Validate (NESeq String) ()
headerValidator NamedRecord -> Parser a
p ConduitT () ByteString (ResourceT m) ()
source Vector a -> Validate (NonEmpty (CsvException err)) (Vector b)
validation = do
  Either (Seq (CsvException err)) (Vector a)
validatedCsv <-
    ConduitT
  ()
  Void
  (ValidateT (Seq (CsvException err)) (ResourceT m))
  (Vector a)
-> m (Either (Seq (CsvException err)) (Vector a))
forall r (m :: * -> *) err.
MonadUnliftIO m =>
ConduitT
  () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
-> m (Either (Seq (CsvException err)) r)
runCsvConduit (ConduitT
   ()
   Void
   (ValidateT (Seq (CsvException err)) (ResourceT m))
   (Vector a)
 -> m (Either (Seq (CsvException err)) (Vector a)))
-> ConduitT
     ()
     Void
     (ValidateT (Seq (CsvException err)) (ResourceT m))
     (Vector a)
-> m (Either (Seq (CsvException err)) (Vector a))
forall a b. (a -> b) -> a -> b
$
      (forall a.
 ResourceT m a
 -> ValidateT (Seq (CsvException err)) (ResourceT m) a)
-> ConduitT () ByteString (ResourceT m) ()
-> ConduitT
     () ByteString (ValidateT (Seq (CsvException err)) (ResourceT m)) ()
forall (m :: * -> *) (n :: * -> *) i o r.
Monad m =>
(forall a. m a -> n a) -> ConduitT i o m r -> ConduitT i o n r
transPipe ResourceT m a -> ValidateT (Seq (CsvException err)) (ResourceT m) a
forall a.
ResourceT m a -> ValidateT (Seq (CsvException err)) (ResourceT m) a
forall (m :: * -> *) a.
Monad m =>
m a -> ValidateT (Seq (CsvException err)) m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ConduitT () ByteString (ResourceT m) ()
source
        ConduitT
  () ByteString (ValidateT (Seq (CsvException err)) (ResourceT m)) ()
-> ConduitT
     ByteString
     Void
     (ValidateT (Seq (CsvException err)) (ResourceT m))
     (Vector a)
-> ConduitT
     ()
     Void
     (ValidateT (Seq (CsvException err)) (ResourceT m))
     (Vector a)
forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| (Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a)
-> ConduitT
     ByteString a (ValidateT (Seq (CsvException err)) (ResourceT m)) ()
forall a (m :: * -> *) err.
(MonadThrow m, MonadValidate (Seq (CsvException err)) m) =>
(Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a) -> ConduitT ByteString a m ()
decodeCsvWithP Header -> Validate (NESeq String) ()
headerValidator NamedRecord -> Parser a
p
        ConduitT
  ByteString a (ValidateT (Seq (CsvException err)) (ResourceT m)) ()
-> ConduitT
     a
     Void
     (ValidateT (Seq (CsvException err)) (ResourceT m))
     (Vector a)
-> ConduitT
     ByteString
     Void
     (ValidateT (Seq (CsvException err)) (ResourceT m))
     (Vector a)
forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| (forall a.
 ResourceT m a
 -> ValidateT (Seq (CsvException err)) (ResourceT m) a)
-> ConduitT a Void (ResourceT m) (Vector a)
-> ConduitT
     a
     Void
     (ValidateT (Seq (CsvException err)) (ResourceT m))
     (Vector a)
forall (m :: * -> *) (n :: * -> *) i o r.
Monad m =>
(forall a. m a -> n a) -> ConduitT i o m r -> ConduitT i o n r
transPipe ResourceT m a -> ValidateT (Seq (CsvException err)) (ResourceT m) a
forall a.
ResourceT m a -> ValidateT (Seq (CsvException err)) (ResourceT m) a
forall (m :: * -> *) a.
Monad m =>
m a -> ValidateT (Seq (CsvException err)) m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift ConduitT a Void (ResourceT m) (Vector a)
forall (v :: * -> *) a (m :: * -> *) o.
(Vector v a, PrimMonad m) =>
ConduitT a o m (v a)
sinkVector

  Validate (NonEmpty (CsvException err)) (Vector b)
-> m (Validate (NonEmpty (CsvException err)) (Vector b))
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Validate (NonEmpty (CsvException err)) (Vector b)
 -> m (Validate (NonEmpty (CsvException err)) (Vector b)))
-> Validate (NonEmpty (CsvException err)) (Vector b)
-> m (Validate (NonEmpty (CsvException err)) (Vector b))
forall a b. (a -> b) -> a -> b
$ case Either (Seq (CsvException err)) (Vector a)
validatedCsv of
    Left Seq (CsvException err)
errs -> NonEmpty (CsvException err)
-> Validate (NonEmpty (CsvException err)) (Vector b)
forall a.
NonEmpty (CsvException err)
-> ValidateT (NonEmpty (CsvException err)) Identity a
forall e (m :: * -> *) a. MonadValidate e m => e -> m a
refute (NonEmpty (CsvException err)
 -> Validate (NonEmpty (CsvException err)) (Vector b))
-> NonEmpty (CsvException err)
-> Validate (NonEmpty (CsvException err)) (Vector b)
forall a b. (a -> b) -> a -> b
$ [CsvException err] -> NonEmpty (CsvException err)
forall a. HasCallStack => [a] -> NonEmpty a
NE.fromList ([CsvException err] -> NonEmpty (CsvException err))
-> [CsvException err] -> NonEmpty (CsvException err)
forall a b. (a -> b) -> a -> b
$ Seq (CsvException err) -> [CsvException err]
forall a. Seq a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList Seq (CsvException err)
errs
    Right Vector a
rows -> Vector a -> Validate (NonEmpty (CsvException err)) (Vector b)
validation Vector a
rows

-- | Run a CSV conduit handling invalid UTF8
runCsvConduit
  :: forall r m err
   . MonadUnliftIO m
  => ConduitT () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
  -> m (Either (Seq (CsvException err)) r)
runCsvConduit :: forall r (m :: * -> *) err.
MonadUnliftIO m =>
ConduitT
  () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
-> m (Either (Seq (CsvException err)) r)
runCsvConduit = (m (Either (Seq (CsvException err)) r)
 -> (TextException -> m (Either (Seq (CsvException err)) r))
 -> m (Either (Seq (CsvException err)) r))
-> (TextException -> m (Either (Seq (CsvException err)) r))
-> m (Either (Seq (CsvException err)) r)
-> m (Either (Seq (CsvException err)) r)
forall a b c. (a -> b -> c) -> b -> a -> c
flip m (Either (Seq (CsvException err)) r)
-> (TextException -> m (Either (Seq (CsvException err)) r))
-> m (Either (Seq (CsvException err)) r)
forall e (m :: * -> *) a.
(Exception e, MonadUnliftIO m, HasCallStack) =>
m a -> (e -> m a) -> m a
catch TextException -> m (Either (Seq (CsvException err)) r)
forall {f :: * -> *} {f :: * -> *} {a} {b}.
(Applicative f, Applicative f) =>
TextException -> f (Either (f (CsvException a)) b)
nonUtf8 (m (Either (Seq (CsvException err)) r)
 -> m (Either (Seq (CsvException err)) r))
-> (ConduitT
      () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
    -> m (Either (Seq (CsvException err)) r))
-> ConduitT
     () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
-> m (Either (Seq (CsvException err)) r)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ResourceT m (Either (Seq (CsvException err)) r)
-> m (Either (Seq (CsvException err)) r)
forall (m :: * -> *) a. MonadUnliftIO m => ResourceT m a -> m a
runResourceT (ResourceT m (Either (Seq (CsvException err)) r)
 -> m (Either (Seq (CsvException err)) r))
-> (ConduitT
      () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
    -> ResourceT m (Either (Seq (CsvException err)) r))
-> ConduitT
     () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
-> m (Either (Seq (CsvException err)) r)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ValidateT (Seq (CsvException err)) (ResourceT m) r
-> ResourceT m (Either (Seq (CsvException err)) r)
forall e (m :: * -> *) a.
Functor m =>
ValidateT e m a -> m (Either e a)
runValidateT (ValidateT (Seq (CsvException err)) (ResourceT m) r
 -> ResourceT m (Either (Seq (CsvException err)) r))
-> (ConduitT
      () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
    -> ValidateT (Seq (CsvException err)) (ResourceT m) r)
-> ConduitT
     () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
-> ResourceT m (Either (Seq (CsvException err)) r)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ConduitT
  () Void (ValidateT (Seq (CsvException err)) (ResourceT m)) r
-> ValidateT (Seq (CsvException err)) (ResourceT m) r
forall (m :: * -> *) r. Monad m => ConduitT () Void m r -> m r
runConduit
 where
  nonUtf8 :: TextException -> f (Either (f (CsvException a)) b)
nonUtf8 (TextException
_ :: Conduit.TextException) =
    Either (f (CsvException a)) b -> f (Either (f (CsvException a)) b)
forall a. a -> f a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either (f (CsvException a)) b
 -> f (Either (f (CsvException a)) b))
-> Either (f (CsvException a)) b
-> f (Either (f (CsvException a)) b)
forall a b. (a -> b) -> a -> b
$ f (CsvException a) -> Either (f (CsvException a)) b
forall a b. a -> Either a b
Left (f (CsvException a) -> Either (f (CsvException a)) b)
-> f (CsvException a) -> Either (f (CsvException a)) b
forall a b. (a -> b) -> a -> b
$ CsvException a -> f (CsvException a)
forall a. a -> f a
forall (f :: * -> *) a. Applicative f => a -> f a
pure CsvException a
forall a. CsvException a
CsvUnknownFileEncoding

-- | Stream in 'ByteString's and parse records in constant space
decodeCsv
  :: forall a m err
   . ( MonadThrow m
     , MonadValidate (Seq (CsvException err)) m
     , ValidateHeader a
     , FromNamedRecord a
     )
  => ConduitT ByteString a m ()
decodeCsv :: forall a (m :: * -> *) err.
(MonadThrow m, MonadValidate (Seq (CsvException err)) m,
 ValidateHeader a, FromNamedRecord a) =>
ConduitT ByteString a m ()
decodeCsv = (Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a) -> ConduitT ByteString a m ()
forall a (m :: * -> *) err.
(MonadThrow m, MonadValidate (Seq (CsvException err)) m) =>
(Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a) -> ConduitT ByteString a m ()
decodeCsvWithP (Proxy a -> Header -> Validate (NESeq String) ()
forall a (m :: * -> *) (proxy :: * -> *).
(ValidateHeader a, Bind m, Monad m) =>
proxy a -> Header -> ValidateT (NESeq String) m ()
forall (m :: * -> *) (proxy :: * -> *).
(Bind m, Monad m) =>
proxy a -> Header -> ValidateT (NESeq String) m ()
validateHeader (forall t. Proxy t
forall {k} (t :: k). Proxy t
Proxy @a)) NamedRecord -> Parser a
forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord

-- | Stream in 'ByteString's and parse records in constant space with a custom
-- record parser
decodeCsvWithP
  :: forall a m err
   . (MonadThrow m, MonadValidate (Seq (CsvException err)) m)
  => (Header -> Validate (NESeq String) ())
  -> (NamedRecord -> Parser a)
  -- ^ Custom record parser
  -> ConduitT ByteString a m ()
decodeCsvWithP :: forall a (m :: * -> *) err.
(MonadThrow m, MonadValidate (Seq (CsvException err)) m) =>
(Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a) -> ConduitT ByteString a m ()
decodeCsvWithP Header -> Validate (NESeq String) ()
headerValidator NamedRecord -> Parser a
p =
  ConduitT ByteString Text m ()
forall (m :: * -> *). MonadThrow m => ConduitT ByteString Text m ()
Conduit.detectUtf -- Strip any BOMs and decode UTF-*
    ConduitT ByteString Text m ()
-> ConduitT Text a m () -> ConduitT ByteString a m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| ConduitT Text ByteString m ()
forall (m :: * -> *) text binary.
(Monad m, Utf8 text binary) =>
ConduitT text binary m ()
Conduit.encodeUtf8 -- Cassava needs ByteString
    ConduitT Text ByteString m ()
-> ConduitT ByteString a m () -> ConduitT Text a m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| (Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a) -> ConduitT ByteString a m ()
forall a (m :: * -> *) err.
MonadValidate (Seq (CsvException err)) m =>
(Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a) -> ConduitT ByteString a m ()
parseCsv Header -> Validate (NESeq String) ()
headerValidator NamedRecord -> Parser a
p

parseCsv
  :: forall a m err
   . MonadValidate (Seq (CsvException err)) m
  => (Header -> Validate (NESeq String) ())
  -> (NamedRecord -> Parser a)
  -> ConduitT ByteString a m ()
parseCsv :: forall a (m :: * -> *) err.
MonadValidate (Seq (CsvException err)) m =>
(Header -> Validate (NESeq String) ())
-> (NamedRecord -> Parser a) -> ConduitT ByteString a m ()
parseCsv Header -> Validate (NESeq String) ()
headerValidator NamedRecord -> Parser a
p =
  (Header -> Validate (NESeq String) ())
-> HeaderParser (Parser a) -> ConduitT ByteString a m ()
forall a (m :: * -> *) err.
MonadValidate (Seq (CsvException err)) m =>
(Header -> Validate (NESeq String) ())
-> HeaderParser (Parser a) -> ConduitT ByteString a m ()
parseHeader
    Header -> Validate (NESeq String) ()
headerValidator
    ((NamedRecord -> Parser a)
-> DecodeOptions -> HeaderParser (Parser a)
forall a.
(NamedRecord -> Parser a)
-> DecodeOptions -> HeaderParser (Parser a)
CsvI.decodeByNameWithP ((NamedRecord -> Parser a) -> NamedRecord -> Parser a
forall a. (NamedRecord -> Parser a) -> NamedRecord -> Parser a
stripParser NamedRecord -> Parser a
p) DecodeOptions
defaultDecodeOptions)

data CsvException a
  = CsvMissingColumn !Text
  | CsvParseException !Int !Text
  | CsvFileNotFound
  | CsvUnknownFileEncoding
  | -- | A constructor for providing extensible csv exceptions
    CsvExceptionExtension a
  deriving stock (CsvException a -> CsvException a -> Bool
(CsvException a -> CsvException a -> Bool)
-> (CsvException a -> CsvException a -> Bool)
-> Eq (CsvException a)
forall a. Eq a => CsvException a -> CsvException a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: forall a. Eq a => CsvException a -> CsvException a -> Bool
== :: CsvException a -> CsvException a -> Bool
$c/= :: forall a. Eq a => CsvException a -> CsvException a -> Bool
/= :: CsvException a -> CsvException a -> Bool
Eq, Int -> CsvException a -> ShowS
[CsvException a] -> ShowS
CsvException a -> String
(Int -> CsvException a -> ShowS)
-> (CsvException a -> String)
-> ([CsvException a] -> ShowS)
-> Show (CsvException a)
forall a. Show a => Int -> CsvException a -> ShowS
forall a. Show a => [CsvException a] -> ShowS
forall a. Show a => CsvException a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: forall a. Show a => Int -> CsvException a -> ShowS
showsPrec :: Int -> CsvException a -> ShowS
$cshow :: forall a. Show a => CsvException a -> String
show :: CsvException a -> String
$cshowList :: forall a. Show a => [CsvException a] -> ShowS
showList :: [CsvException a] -> ShowS
Show)

instance ToJSON a => ToJSON (CsvException a) where
  toJSON :: CsvException a -> Value
toJSON = ([Pair] -> Value) -> (a -> Value) -> CsvException a -> Value
forall kv r a.
KeyValue kv =>
([kv] -> r) -> (a -> r) -> CsvException a -> r
csvExceptionPairs [Pair] -> Value
object a -> Value
forall a. ToJSON a => a -> Value
toJSON
  toEncoding :: CsvException a -> Encoding
toEncoding = ([Series] -> Encoding)
-> (a -> Encoding) -> CsvException a -> Encoding
forall kv r a.
KeyValue kv =>
([kv] -> r) -> (a -> r) -> CsvException a -> r
csvExceptionPairs (Series -> Encoding
pairs (Series -> Encoding)
-> ([Series] -> Series) -> [Series] -> Encoding
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Series] -> Series
forall a. Monoid a => [a] -> a
mconcat) a -> Encoding
forall a. ToJSON a => a -> Encoding
toEncoding

csvExceptionPairs
#if MIN_VERSION_aeson(2,2,0)
  :: KeyValue e kv
#else
  :: KeyValue kv
#endif
  => ([kv] -> r)
  -> (a -> r)
  -> CsvException a
  -> r
csvExceptionPairs :: forall kv r a.
KeyValue kv =>
([kv] -> r) -> (a -> r) -> CsvException a -> r
csvExceptionPairs [kv] -> r
done a -> r
extend = \case
  CsvMissingColumn Text
column ->
    [kv] -> r
done
      [ Key
"message" Key -> Text -> kv
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> kv
.= (Text
"Missing column " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Text
forall a. Show a => a -> Text
tshow Text
column)
      , Key
"missingColumn" Key -> Text -> kv
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> kv
.= Text
column
      ]
  CsvParseException Int
rowNumber Text
message ->
    [kv] -> r
done [Key
"rowNumber" Key -> Int -> kv
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> kv
.= Int
rowNumber, Key
"message" Key -> Text -> kv
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> kv
.= Text
message]
  CsvException a
CsvFileNotFound -> [kv] -> r
done [Key
"message" Key -> Text -> kv
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> kv
.= (Text
"file not found" :: Text)]
  CsvException a
CsvUnknownFileEncoding ->
    [kv] -> r
done [Key
"message" Key -> Text -> kv
forall kv v. (KeyValue kv, ToJSON v) => Key -> v -> kv
forall v. ToJSON v => Key -> v -> kv
.= (Text
"file could not be decoded" :: Text)]
  CsvExceptionExtension a
a -> a -> r
extend a
a

parseHeader
  :: forall a m err
   . MonadValidate (Seq (CsvException err)) m
  => (Header -> Validate (NESeq String) ())
  -> CsvI.HeaderParser (CsvI.Parser a)
  -> ConduitT ByteString a m ()
parseHeader :: forall a (m :: * -> *) err.
MonadValidate (Seq (CsvException err)) m =>
(Header -> Validate (NESeq String) ())
-> HeaderParser (Parser a) -> ConduitT ByteString a m ()
parseHeader Header -> Validate (NESeq String) ()
headerValidator = \case
  CsvI.FailH ByteString
_ String
err ->
    m () -> ConduitT ByteString a m ()
forall (m :: * -> *) a. Monad m => m a -> ConduitT ByteString a m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> ConduitT ByteString a m ())
-> m () -> ConduitT ByteString a m ()
forall a b. (a -> b) -> a -> b
$ Seq (CsvException err) -> m ()
forall a. Seq (CsvException err) -> m a
forall e (m :: * -> *) a. MonadValidate e m => e -> m a
refute (Seq (CsvException err) -> m ()) -> Seq (CsvException err) -> m ()
forall a b. (a -> b) -> a -> b
$ CsvException err -> Seq (CsvException err)
forall a. a -> Seq a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (CsvException err -> Seq (CsvException err))
-> CsvException err -> Seq (CsvException err)
forall a b. (a -> b) -> a -> b
$ Int -> Text -> CsvException err
forall a. Int -> Text -> CsvException a
CsvParseException Int
headerLineNumber (Text -> CsvException err) -> Text -> CsvException err
forall a b. (a -> b) -> a -> b
$ String -> Text
pack String
err
  CsvI.PartialH ByteString -> HeaderParser (Parser a)
k ->
    ConduitT ByteString a m (Maybe ByteString)
forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await ConduitT ByteString a m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString a m ())
-> ConduitT ByteString a m ()
forall a b.
ConduitT ByteString a m a
-> (a -> ConduitT ByteString a m b) -> ConduitT ByteString a m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= ((Header -> Validate (NESeq String) ())
-> HeaderParser (Parser a) -> ConduitT ByteString a m ()
forall a (m :: * -> *) err.
MonadValidate (Seq (CsvException err)) m =>
(Header -> Validate (NESeq String) ())
-> HeaderParser (Parser a) -> ConduitT ByteString a m ()
parseHeader Header -> Validate (NESeq String) ()
headerValidator (HeaderParser (Parser a) -> ConduitT ByteString a m ())
-> (ByteString -> HeaderParser (Parser a))
-> ByteString
-> ConduitT ByteString a m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> HeaderParser (Parser a)
k) (ByteString -> ConduitT ByteString a m ())
-> (Maybe ByteString -> ByteString)
-> Maybe ByteString
-> ConduitT ByteString a m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe ByteString -> ByteString
forall a. a -> Maybe a -> a
fromMaybe ByteString
forall a. Monoid a => a
mempty
  CsvI.DoneH Header
header Parser a
parser ->
    case Validate (NESeq String) () -> Either (NESeq String) ()
forall e a. Validate e a -> Either e a
runValidate (Validate (NESeq String) () -> Either (NESeq String) ())
-> Validate (NESeq String) () -> Either (NESeq String) ()
forall a b. (a -> b) -> a -> b
$ Header -> Validate (NESeq String) ()
headerValidator (Header -> Validate (NESeq String) ())
-> Header -> Validate (NESeq String) ()
forall a b. (a -> b) -> a -> b
$ (ByteString -> ByteString) -> Header -> Header
forall a b. (a -> b) -> Vector a -> Vector b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> ByteString
stripUtf8 Header
header of
      Right {} -> Int -> Parser a -> ConduitT ByteString a m ()
forall err (m :: * -> *) a.
MonadValidate (Seq (CsvException err)) m =>
Int -> Parser a -> ConduitT ByteString a m ()
parseRow (Int -> Int
forall a. Enum a => a -> a
succ Int
headerLineNumber) Parser a
parser
      Left NESeq String
errs ->
        m () -> ConduitT ByteString a m ()
forall (m :: * -> *) a. Monad m => m a -> ConduitT ByteString a m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> ConduitT ByteString a m ())
-> m () -> ConduitT ByteString a m ()
forall a b. (a -> b) -> a -> b
$ Seq (CsvException err) -> m ()
forall a. Seq (CsvException err) -> m a
forall e (m :: * -> *) a. MonadValidate e m => e -> m a
refute (Seq (CsvException err) -> m ()) -> Seq (CsvException err) -> m ()
forall a b. (a -> b) -> a -> b
$ [CsvException err] -> Seq (CsvException err)
forall a. [a] -> Seq a
Seq.fromList ([CsvException err] -> Seq (CsvException err))
-> [CsvException err] -> Seq (CsvException err)
forall a b. (a -> b) -> a -> b
$ Text -> CsvException err
forall a. Text -> CsvException a
CsvMissingColumn (Text -> CsvException err)
-> (String -> Text) -> String -> CsvException err
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
pack (String -> CsvException err) -> [String] -> [CsvException err]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NESeq String -> [String]
forall a. NESeq a -> [a]
forall (t :: * -> *) a. Foldable t => t a -> [a]
toList NESeq String
errs

parseRow
  :: MonadValidate (Seq (CsvException err)) m
  => Int
  -> CsvI.Parser a
  -> ConduitT ByteString a m ()
parseRow :: forall err (m :: * -> *) a.
MonadValidate (Seq (CsvException err)) m =>
Int -> Parser a -> ConduitT ByteString a m ()
parseRow Int
rowNumber Parser a
parse = case Parser a
parse of
  CsvI.Fail ByteString
_ String
err ->
    m () -> ConduitT ByteString a m ()
forall (m :: * -> *) a. Monad m => m a -> ConduitT ByteString a m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> ConduitT ByteString a m ())
-> m () -> ConduitT ByteString a m ()
forall a b. (a -> b) -> a -> b
$ Seq (CsvException err) -> m ()
forall e (m :: * -> *). MonadValidate e m => e -> m ()
dispute (Seq (CsvException err) -> m ()) -> Seq (CsvException err) -> m ()
forall a b. (a -> b) -> a -> b
$ CsvException err -> Seq (CsvException err)
forall a. a -> Seq a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (CsvException err -> Seq (CsvException err))
-> CsvException err -> Seq (CsvException err)
forall a b. (a -> b) -> a -> b
$ Int -> Text -> CsvException err
forall a. Int -> Text -> CsvException a
CsvParseException Int
rowNumber (Text -> CsvException err) -> Text -> CsvException err
forall a b. (a -> b) -> a -> b
$ String -> Text
pack String
err
  CsvI.Many [Either String a]
rows ByteString -> Parser a
k -> do
    !Int
newRowNumber <- [Either String a] -> ConduitT ByteString a m Int
handleRows [Either String a]
rows
    ConduitT ByteString a m (Maybe ByteString)
forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await ConduitT ByteString a m (Maybe ByteString)
-> (Maybe ByteString -> ConduitT ByteString a m ())
-> ConduitT ByteString a m ()
forall a b.
ConduitT ByteString a m a
-> (a -> ConduitT ByteString a m b) -> ConduitT ByteString a m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Parser a -> ConduitT ByteString a m ()
forall err (m :: * -> *) a.
MonadValidate (Seq (CsvException err)) m =>
Int -> Parser a -> ConduitT ByteString a m ()
parseRow Int
newRowNumber (Parser a -> ConduitT ByteString a m ())
-> (Maybe ByteString -> Parser a)
-> Maybe ByteString
-> ConduitT ByteString a m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Parser a
k (ByteString -> Parser a)
-> (Maybe ByteString -> ByteString) -> Maybe ByteString -> Parser a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Maybe ByteString -> ByteString
forall a. a -> Maybe a -> a
fromMaybe ByteString
forall a. Monoid a => a
mempty
  CsvI.Done [Either String a]
rows -> ConduitT ByteString a m Int -> ConduitT ByteString a m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ConduitT ByteString a m Int -> ConduitT ByteString a m ())
-> ConduitT ByteString a m Int -> ConduitT ByteString a m ()
forall a b. (a -> b) -> a -> b
$ [Either String a] -> ConduitT ByteString a m Int
handleRows [Either String a]
rows
 where
  handleRows :: [Either String a] -> ConduitT ByteString a m Int
handleRows = (Int -> Either String a -> ConduitT ByteString a m Int)
-> Int -> [Either String a] -> ConduitT ByteString a m Int
forall (t :: * -> *) (m :: * -> *) b a.
(Foldable t, Monad m) =>
(b -> a -> m b) -> b -> t a -> m b
foldM Int -> Either String a -> ConduitT ByteString a m Int
forall err (m :: * -> *) a i.
MonadValidate (Seq (CsvException err)) m =>
Int -> Either String a -> ConduitT i a m Int
handleRow Int
rowNumber

handleRow
  :: MonadValidate (Seq (CsvException err)) m
  => Int
  -> Either String a
  -> ConduitT i a m Int
handleRow :: forall err (m :: * -> *) a i.
MonadValidate (Seq (CsvException err)) m =>
Int -> Either String a -> ConduitT i a m Int
handleRow Int
rowNumber Either String a
result = do
  (String -> ConduitT i a m ())
-> (a -> ConduitT i a m ()) -> Either String a -> ConduitT i a m ()
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either
    (m () -> ConduitT i a m ()
forall (m :: * -> *) a. Monad m => m a -> ConduitT i a m a
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> ConduitT i a m ())
-> (String -> m ()) -> String -> ConduitT i a m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Seq (CsvException err) -> m ()
forall e (m :: * -> *). MonadValidate e m => e -> m ()
dispute (Seq (CsvException err) -> m ())
-> (String -> Seq (CsvException err)) -> String -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CsvException err -> Seq (CsvException err)
forall a. a -> Seq a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (CsvException err -> Seq (CsvException err))
-> (String -> CsvException err) -> String -> Seq (CsvException err)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> CsvException err
forall a. Int -> Text -> CsvException a
CsvParseException Int
rowNumber (Text -> CsvException err)
-> (String -> Text) -> String -> CsvException err
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
pack)
    a -> ConduitT i a m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield
    Either String a
result
  Int -> ConduitT i a m Int
forall a. a -> ConduitT i a m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int -> ConduitT i a m Int) -> Int -> ConduitT i a m Int
forall a b. (a -> b) -> a -> b
$ Int
rowNumber Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1

-- | Strip leading/trailing whitespace from each key-value pairs
stripNamedRecord :: NamedRecord -> NamedRecord
stripNamedRecord :: NamedRecord -> NamedRecord
stripNamedRecord =
  [(ByteString, ByteString)] -> NamedRecord
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
HashMap.fromList ([(ByteString, ByteString)] -> NamedRecord)
-> (NamedRecord -> [(ByteString, ByteString)])
-> NamedRecord
-> NamedRecord
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((ByteString, ByteString) -> (ByteString, ByteString))
-> [(ByteString, ByteString)] -> [(ByteString, ByteString)]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((ByteString -> ByteString)
-> (ByteString -> ByteString)
-> (ByteString, ByteString)
-> (ByteString, ByteString)
forall a b c d. (a -> b) -> (c -> d) -> (a, c) -> (b, d)
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap ByteString -> ByteString
stripUtf8 ByteString -> ByteString
stripUtf8) ([(ByteString, ByteString)] -> [(ByteString, ByteString)])
-> (NamedRecord -> [(ByteString, ByteString)])
-> NamedRecord
-> [(ByteString, ByteString)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NamedRecord -> [(ByteString, ByteString)]
forall k v. HashMap k v -> [(k, v)]
HashMap.toList

-- | Strip leading/trailing whitespace from @'ByteString'@ via UTF-8
stripUtf8 :: ByteString -> ByteString
stripUtf8 :: ByteString -> ByteString
stripUtf8 = Text -> ByteString
T.encodeUtf8 (Text -> ByteString)
-> (ByteString -> Text) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.strip (Text -> Text) -> (ByteString -> Text) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
T.decodeUtf8

-- | Take a custom parser and return one that strips prior to parsing
stripParser :: (NamedRecord -> Parser a) -> (NamedRecord -> Parser a)
stripParser :: forall a. (NamedRecord -> Parser a) -> NamedRecord -> Parser a
stripParser NamedRecord -> Parser a
p = NamedRecord -> Parser a
p (NamedRecord -> Parser a)
-> (NamedRecord -> NamedRecord) -> NamedRecord -> Parser a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. NamedRecord -> NamedRecord
stripNamedRecord