{-# LANGUAGE BangPatterns, CPP, DeriveFunctor #-}

-- | This module allows for streaming decoding of CSV data. This is
-- useful if you need to parse large amounts of input in constant
-- space. The API also allows you to ignore type conversion errors on
-- a per-record basis.
module Data.Csv.Streaming
    (
    -- * Usage example
    -- $example

    -- * Stream representation
    -- $stream-representation
      Records(..)

    -- * Decoding records
    -- $typeconversion

    -- ** Index-based record conversion
    -- $indexbased
    , HasHeader(..)
    , decode
    , decodeWith

    -- ** Name-based record conversion
    -- $namebased
    , decodeByName
    , decodeByNameWith
    ) where

import Control.DeepSeq (NFData(rnf))
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import qualified Data.ByteString.Lazy.Char8 as BL8
import Data.Foldable (Foldable(..))
import Prelude hiding (foldr)

import Data.Csv.Conversion
import Data.Csv.Incremental hiding (decode, decodeByName, decodeByNameWith,
                                    decodeWith)
import qualified Data.Csv.Incremental as I
import Data.Csv.Parser
import Data.Csv.Types

#if !MIN_VERSION_base(4,8,0)
import Control.Applicative ((<$>), (<*>), pure)
import Data.Traversable (Traversable(..))
#endif

#if !MIN_VERSION_bytestring(0,10,0)
import qualified Data.ByteString.Lazy.Internal as BL  -- for constructors
#endif

-- $example
--
-- A short usage example:
--
-- > for_ (decode NoHeader "John,27\r\nJane,28\r\n") $ \ (name, age :: Int) ->
-- >     putStrLn $ name ++ " is " ++ show age ++ " years old"
--
-- N.B. The 'Foldable' instance, which is used above, skips records
-- that failed to convert. If you don't want this behavior, work
-- directly with the 'Cons' and 'Nil' constructors.

-- $stream-representation
--
-- A stream of records is represented as a (lazy) list that may
-- contain errors.

-- $typeconversion
--
-- Just like in the case of non-streaming decoding, there are two ways
-- to convert CSV records to and from and user-defined data types:
-- index-based conversion and name-based conversion.

-- $indexbased
--
-- See documentation on index-based conversion in "Data.Csv" for more
-- information.

-- $namebased
--
-- See documentation on name-based conversion in "Data.Csv" for more
-- information.

-- | A stream of parsed records. If type conversion failed for the
-- record, the error is returned as @'Left' errMsg@.
data Records a
    = -- | A record or an error message, followed by more records.
      Cons (Either String a) (Records a)

      -- | End of stream, potentially due to a parse error. If a parse
      -- error occured, the first field contains the error message.
      -- The second field contains any unconsumed input.
    | Nil (Maybe String) BL.ByteString
    deriving (Records a -> Records a -> Bool
(Records a -> Records a -> Bool)
-> (Records a -> Records a -> Bool) -> Eq (Records a)
forall a. Eq a => Records a -> Records a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Records a -> Records a -> Bool
$c/= :: forall a. Eq a => Records a -> Records a -> Bool
== :: Records a -> Records a -> Bool
$c== :: forall a. Eq a => Records a -> Records a -> Bool
Eq, a -> Records b -> Records a
(a -> b) -> Records a -> Records b
(forall a b. (a -> b) -> Records a -> Records b)
-> (forall a b. a -> Records b -> Records a) -> Functor Records
forall a b. a -> Records b -> Records a
forall a b. (a -> b) -> Records a -> Records b
forall (f :: * -> *).
(forall a b. (a -> b) -> f a -> f b)
-> (forall a b. a -> f b -> f a) -> Functor f
<$ :: a -> Records b -> Records a
$c<$ :: forall a b. a -> Records b -> Records a
fmap :: (a -> b) -> Records a -> Records b
$cfmap :: forall a b. (a -> b) -> Records a -> Records b
Functor, Int -> Records a -> ShowS
[Records a] -> ShowS
Records a -> String
(Int -> Records a -> ShowS)
-> (Records a -> String)
-> ([Records a] -> ShowS)
-> Show (Records a)
forall a. Show a => Int -> Records a -> ShowS
forall a. Show a => [Records a] -> ShowS
forall a. Show a => Records a -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Records a] -> ShowS
$cshowList :: forall a. Show a => [Records a] -> ShowS
show :: Records a -> String
$cshow :: forall a. Show a => Records a -> String
showsPrec :: Int -> Records a -> ShowS
$cshowsPrec :: forall a. Show a => Int -> Records a -> ShowS
Show)

-- | Skips records that failed to convert.
instance Foldable Records where
    foldr :: (a -> b -> b) -> b -> Records a -> b
foldr = (a -> b -> b) -> b -> Records a -> b
forall a b. (a -> b -> b) -> b -> Records a -> b
foldrRecords
#if MIN_VERSION_base(4,6,0)
    foldl' :: (b -> a -> b) -> b -> Records a -> b
foldl' = (b -> a -> b) -> b -> Records a -> b
forall b a. (b -> a -> b) -> b -> Records a -> b
foldlRecords'
#endif

foldrRecords :: (a -> b -> b) -> b -> Records a -> b
foldrRecords :: (a -> b -> b) -> b -> Records a -> b
foldrRecords a -> b -> b
f = b -> Records a -> b
go
  where
    go :: b -> Records a -> b
go b
z (Cons (Right a
x) Records a
rs) = a -> b -> b
f a
x (b -> Records a -> b
go b
z Records a
rs)
    go b
z (Cons (Left String
_) Records a
rs) = b -> Records a -> b
go b
z Records a
rs
    go b
z Records a
_ = b
z
{-# INLINE foldrRecords #-}

#if MIN_VERSION_base(4,6,0)
foldlRecords' :: (a -> b -> a) -> a -> Records b -> a
foldlRecords' :: (a -> b -> a) -> a -> Records b -> a
foldlRecords' a -> b -> a
f = a -> Records b -> a
go
  where
    go :: a -> Records b -> a
go a
z (Cons (Right b
x) Records b
rs) = let z' :: a
z' = a -> b -> a
f a
z b
x in a
z' a -> a -> a
`seq` a -> Records b -> a
go a
z' Records b
rs
    go a
z (Cons (Left String
_) Records b
rs) = a -> Records b -> a
go a
z Records b
rs
    go a
z Records b
_ = a
z
{-# INLINE foldlRecords' #-}
#endif

instance Traversable Records where
    traverse :: (a -> f b) -> Records a -> f (Records b)
traverse a -> f b
_ (Nil Maybe String
merr ByteString
rest) = Records b -> f (Records b)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Records b -> f (Records b)) -> Records b -> f (Records b)
forall a b. (a -> b) -> a -> b
$ Maybe String -> ByteString -> Records b
forall a. Maybe String -> ByteString -> Records a
Nil Maybe String
merr ByteString
rest
    traverse a -> f b
f (Cons Either String a
x Records a
xs)     = Either String b -> Records b -> Records b
forall a. Either String a -> Records a -> Records a
Cons (Either String b -> Records b -> Records b)
-> f (Either String b) -> f (Records b -> Records b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either String a -> f (Either String b)
forall a. Either a a -> f (Either a b)
traverseElem Either String a
x f (Records b -> Records b) -> f (Records b) -> f (Records b)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (a -> f b) -> Records a -> f (Records b)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
traverse a -> f b
f Records a
xs
      where
        traverseElem :: Either a a -> f (Either a b)
traverseElem (Left a
err) = Either a b -> f (Either a b)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either a b -> f (Either a b)) -> Either a b -> f (Either a b)
forall a b. (a -> b) -> a -> b
$ a -> Either a b
forall a b. a -> Either a b
Left a
err
        traverseElem (Right a
y)  = b -> Either a b
forall a b. b -> Either a b
Right (b -> Either a b) -> f b -> f (Either a b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> a -> f b
f a
y

instance NFData a => NFData (Records a) where
    rnf :: Records a -> ()
rnf (Cons Either String a
r Records a
rs) = Either String a -> ()
forall a. NFData a => a -> ()
rnf Either String a
r () -> () -> ()
`seq` Records a -> ()
forall a. NFData a => a -> ()
rnf Records a
rs
#if MIN_VERSION_bytestring(0,10,0)
    rnf (Nil Maybe String
errMsg ByteString
rest) = Maybe String -> ()
forall a. NFData a => a -> ()
rnf Maybe String
errMsg () -> () -> ()
`seq` ByteString -> ()
forall a. NFData a => a -> ()
rnf ByteString
rest
#else
    rnf (Nil errMsg rest) = rnf errMsg `seq` rnfLazyByteString rest

rnfLazyByteString :: BL.ByteString -> ()
rnfLazyByteString BL.Empty       = ()
rnfLazyByteString (BL.Chunk _ b) = rnfLazyByteString b
#endif

-- | Efficiently deserialize CSV records in a streaming fashion.
-- Equivalent to @'decodeWith' 'defaultDecodeOptions'@.
decode :: FromRecord a
       => HasHeader      -- ^ Data contains header that should be
                         -- skipped
       -> BL.ByteString  -- ^ CSV data
       -> Records a
decode :: HasHeader -> ByteString -> Records a
decode = DecodeOptions -> HasHeader -> ByteString -> Records a
forall a.
FromRecord a =>
DecodeOptions -> HasHeader -> ByteString -> Records a
decodeWith DecodeOptions
defaultDecodeOptions

-- | Like 'decode', but lets you customize how the CSV data is parsed.
decodeWith :: FromRecord a
           => DecodeOptions  -- ^ Decoding options
           -> HasHeader      -- ^ Data contains header that should be
                             -- skipped
           -> BL.ByteString  -- ^ CSV data
           -> Records a
decodeWith :: DecodeOptions -> HasHeader -> ByteString -> Records a
decodeWith !DecodeOptions
opts HasHeader
hasHeader ByteString
s0 =
    [ByteString] -> Parser a -> Records a
forall a. [ByteString] -> Parser a -> Records a
go (ByteString -> [ByteString]
BL.toChunks ByteString
s0) (DecodeOptions -> HasHeader -> Parser a
forall a. FromRecord a => DecodeOptions -> HasHeader -> Parser a
I.decodeWith DecodeOptions
opts HasHeader
hasHeader)
  where
    go :: [ByteString] -> Parser a -> Records a
go [ByteString]
ss (Done [Either String a]
xs)       = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons (Maybe String -> ByteString -> Records a
forall a. Maybe String -> ByteString -> Records a
Nil Maybe String
forall a. Maybe a
Nothing ([ByteString] -> ByteString
BL.fromChunks [ByteString]
ss)) [Either String a]
xs
    go [ByteString]
ss (Fail ByteString
rest String
err) = Maybe String -> ByteString -> Records a
forall a. Maybe String -> ByteString -> Records a
Nil (String -> Maybe String
forall a. a -> Maybe a
Just String
err) ([ByteString] -> ByteString
BL.fromChunks (ByteString
restByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
ss))
    go [] (Many [Either String a]
xs ByteString -> Parser a
k)     = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons ([ByteString] -> Parser a -> Records a
go [] (ByteString -> Parser a
k ByteString
B.empty)) [Either String a]
xs
    go (ByteString
s:[ByteString]
ss) (Many [Either String a]
xs ByteString -> Parser a
k) = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons ([ByteString] -> Parser a -> Records a
go [ByteString]
ss (ByteString -> Parser a
k ByteString
s)) [Either String a]
xs

-- | Efficiently deserialize CSV in a streaming fashion. The data is
-- assumed to be preceded by a header. Returns @'Left' errMsg@ if
-- parsing the header fails. Equivalent to @'decodeByNameWith'
-- 'defaultDecodeOptions'@.
decodeByName :: FromNamedRecord a
             => BL.ByteString  -- ^ CSV data
             -> Either String (Header, Records a)
decodeByName :: ByteString -> Either String (Header, Records a)
decodeByName = DecodeOptions -> ByteString -> Either String (Header, Records a)
forall a.
FromNamedRecord a =>
DecodeOptions -> ByteString -> Either String (Header, Records a)
decodeByNameWith DecodeOptions
defaultDecodeOptions

-- TODO: Include something more in error messages?

-- | Like 'decodeByName', but lets you customize how the CSV data is
-- parsed.
decodeByNameWith :: FromNamedRecord a
                 => DecodeOptions  -- ^ Decoding options
                 -> BL.ByteString  -- ^ CSV data
                 -> Either String (Header, Records a)
decodeByNameWith :: DecodeOptions -> ByteString -> Either String (Header, Records a)
decodeByNameWith !DecodeOptions
opts ByteString
s0 = [ByteString]
-> HeaderParser (Parser a) -> Either String (Header, Records a)
forall a.
[ByteString]
-> HeaderParser (Parser a) -> Either String (Header, Records a)
go (ByteString -> [ByteString]
BL.toChunks ByteString
s0) (DecodeOptions -> HeaderParser (Parser a)
forall a.
FromNamedRecord a =>
DecodeOptions -> HeaderParser (Parser a)
I.decodeByNameWith DecodeOptions
opts)
  where
    go :: [ByteString]
-> HeaderParser (Parser a) -> Either String (Header, Records a)
go [ByteString]
ss (DoneH Header
hdr Parser a
p)    = (Header, Records a) -> Either String (Header, Records a)
forall a b. b -> Either a b
Right (Header
hdr, [ByteString] -> Parser a -> Records a
forall a. [ByteString] -> Parser a -> Records a
go2 [ByteString]
ss Parser a
p)
    go [ByteString]
ss (FailH ByteString
rest String
err) = String -> Either String (Header, Records a)
forall a b. a -> Either a b
Left (String -> Either String (Header, Records a))
-> String -> Either String (Header, Records a)
forall a b. (a -> b) -> a -> b
$ String
err String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" at " String -> ShowS
forall a. [a] -> [a] -> [a]
++
                             ShowS
forall a. Show a => a -> String
show (ByteString -> String
BL8.unpack (ByteString -> String)
-> ([ByteString] -> ByteString) -> [ByteString] -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> ByteString
BL.fromChunks ([ByteString] -> String) -> [ByteString] -> String
forall a b. (a -> b) -> a -> b
$ ByteString
rest ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: [ByteString]
ss)
    go [] (PartialH ByteString -> HeaderParser (Parser a)
k)     = [ByteString]
-> HeaderParser (Parser a) -> Either String (Header, Records a)
go [] (ByteString -> HeaderParser (Parser a)
k ByteString
B.empty)
    go (ByteString
s:[ByteString]
ss) (PartialH ByteString -> HeaderParser (Parser a)
k) = [ByteString]
-> HeaderParser (Parser a) -> Either String (Header, Records a)
go [ByteString]
ss (ByteString -> HeaderParser (Parser a)
k ByteString
s)

    go2 :: [ByteString] -> Parser a -> Records a
go2 [ByteString]
ss (Done [Either String a]
xs)       = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons (Maybe String -> ByteString -> Records a
forall a. Maybe String -> ByteString -> Records a
Nil Maybe String
forall a. Maybe a
Nothing ([ByteString] -> ByteString
BL.fromChunks [ByteString]
ss)) [Either String a]
xs
    go2 [ByteString]
ss (Fail ByteString
rest String
err) = Maybe String -> ByteString -> Records a
forall a. Maybe String -> ByteString -> Records a
Nil (String -> Maybe String
forall a. a -> Maybe a
Just String
err) ([ByteString] -> ByteString
BL.fromChunks (ByteString
restByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:[ByteString]
ss))
    go2 [] (Many [Either String a]
xs ByteString -> Parser a
k)     = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons ([ByteString] -> Parser a -> Records a
go2 [] (ByteString -> Parser a
k ByteString
B.empty)) [Either String a]
xs
    go2 (ByteString
s:[ByteString]
ss) (Many [Either String a]
xs ByteString -> Parser a
k) = (Either String a -> Records a -> Records a)
-> Records a -> [Either String a] -> Records a
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Either String a -> Records a -> Records a
forall a. Either String a -> Records a -> Records a
Cons ([ByteString] -> Parser a -> Records a
go2 [ByteString]
ss (ByteString -> Parser a
k ByteString
s)) [Either String a]
xs