-- | CSV parser as specified in RFC4180
--

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications #-}

module Foundation.Format.CSV.Parser
    ( file
    , recordC
    , record
    , record_
    , field
    ) where

import Basement.Imports hiding (throw)
import Foundation.Format.CSV.Types
import Basement.String (snoc)
import Foundation.Parser
import Foundation.Monad
import Foundation.Collection (Collection (elem))
import Foundation.Conduit
import Control.Monad (void)
import Data.Typeable (typeRep)
import Data.Proxy (Proxy(..))

recordC :: (Monad m, MonadThrow m) => Conduit String Row m ()
recordC :: Conduit String Row m ()
recordC = (String -> Conduit String Row m ()) -> Conduit String Row m ()
forall input output (monad :: * -> *) b.
(input -> Conduit input output monad b)
-> Conduit input output monad ()
awaitForever ((String -> Conduit String Row m ()) -> Conduit String Row m ())
-> (String -> Conduit String Row m ()) -> Conduit String Row m ()
forall a b. (a -> b) -> a -> b
$ Result String Row -> Conduit String Row m ()
forall (m :: * -> *) input o.
(MonadThrow m, Typeable input, Show input) =>
Result input o -> Conduit (Chunk input) o m ()
recordC' (Result String Row -> Conduit String Row m ())
-> (String -> Result String Row)
-> String
-> Conduit String Row m ()
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Parser String Row -> String -> Result String Row
forall input a.
ParserSource input =>
Parser input a -> input -> Result input a
parse (Parser String Row
record Parser String Row -> Parser String (Maybe ()) -> Parser String Row
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser String () -> Parser String (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Chunk String -> Parser String ()
forall input.
(ParserSource input, Sequential (Chunk input),
 Element (Chunk input) ~ Element input, Eq (Chunk input)) =>
Chunk input -> Parser input ()
elements String
Chunk String
crlf))
  where
    recordC' :: Result input o -> Conduit (Chunk input) o m ()
recordC' (ParseFailed ParseError input
err) = ParseError input -> Conduit (Chunk input) o m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throw ParseError input
err
    recordC' (ParseOk Chunk input
rest o
v)  = Chunk input -> Conduit (Chunk input) o m ()
forall i o (m :: * -> *). i -> Conduit i o m ()
leftover Chunk input
rest Conduit (Chunk input) o m ()
-> Conduit (Chunk input) o m () -> Conduit (Chunk input) o m ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> o -> Conduit (Chunk input) o m ()
forall (m :: * -> *) o i. Monad m => o -> Conduit i o m ()
yield o
v
    recordC' (ParseMore Chunk input -> Result input o
more)  = do
        Maybe (Chunk input)
mm <- Conduit (Chunk input) o m (Maybe (Chunk input))
forall i o (m :: * -> *). Conduit i o m (Maybe i)
await
        case Maybe (Chunk input)
mm of
            Maybe (Chunk input)
Nothing -> ParseError String -> Conduit (Chunk input) o m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throw (ParseError String
forall input. ParseError input
NotEnoughParseOnly :: ParseError String)
            Just Chunk input
b  -> Result input o -> Conduit (Chunk input) o m ()
recordC' (Chunk input -> Result input o
more Chunk input
b)

record_ :: forall row . (Typeable row, Record row) => Parser String row
record_ :: Parser String row
record_ = do
    Row
rs <- Parser String Row
record
    case Row -> Either String row
forall a. Record a => Row -> Either String a
fromRow Row
rs of
        Left String
err -> ParseError String -> Parser String row
forall input a. ParseError input -> Parser input a
reportError (ParseError String -> Parser String row)
-> ParseError String -> Parser String row
forall a b. (a -> b) -> a -> b
$ Chunk String -> Chunk String -> ParseError String
forall input. Chunk input -> Chunk input -> ParseError input
Expected (TypeRep -> String
forall a. Show a => a -> String
show (TypeRep -> String) -> TypeRep -> String
forall a b. (a -> b) -> a -> b
$ Proxy row -> TypeRep
forall k (proxy :: k -> *) (a :: k).
Typeable a =>
proxy a -> TypeRep
typeRep (Proxy row
forall k (t :: k). Proxy t
Proxy @row)) String
Chunk String
err
        Right row
v  -> row -> Parser String row
forall (f :: * -> *) a. Applicative f => a -> f a
pure row
v

file :: Parser String CSV
file :: Parser String CSV
file = do
    Maybe Row
mh <- Parser String Row -> Parser String (Maybe Row)
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Parser String Row -> Parser String (Maybe Row))
-> Parser String Row -> Parser String (Maybe Row)
forall a b. (a -> b) -> a -> b
$ Parser String Row
header Parser String Row -> Parser String () -> Parser String Row
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Chunk String -> Parser String ()
forall input.
(ParserSource input, Sequential (Chunk input),
 Element (Chunk input) ~ Element input, Eq (Chunk input)) =>
Chunk input -> Parser input ()
elements String
Chunk String
crlf
    Row
x <- Parser String Row
record
    [Row]
xs <- Parser String Row -> Parser String [Row]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (Parser String Row -> Parser String [Row])
-> Parser String Row -> Parser String [Row]
forall a b. (a -> b) -> a -> b
$ Chunk String -> Parser String ()
forall input.
(ParserSource input, Sequential (Chunk input),
 Element (Chunk input) ~ Element input, Eq (Chunk input)) =>
Chunk input -> Parser input ()
elements String
Chunk String
crlf Parser String () -> Parser String Row -> Parser String Row
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser String Row
record
    Parser String (Maybe ()) -> Parser String ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser String (Maybe ()) -> Parser String ())
-> Parser String (Maybe ()) -> Parser String ()
forall a b. (a -> b) -> a -> b
$ Parser String () -> Parser String (Maybe ())
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional (Parser String () -> Parser String (Maybe ()))
-> Parser String () -> Parser String (Maybe ())
forall a b. (a -> b) -> a -> b
$ Chunk String -> Parser String ()
forall input.
(ParserSource input, Sequential (Chunk input),
 Element (Chunk input) ~ Element input, Eq (Chunk input)) =>
Chunk input -> Parser input ()
elements String
Chunk String
crlf
    CSV -> Parser String CSV
forall (f :: * -> *) a. Applicative f => a -> f a
pure (CSV -> Parser String CSV) -> CSV -> Parser String CSV
forall a b. (a -> b) -> a -> b
$ [Item CSV] -> CSV
forall l. IsList l => [Item l] -> l
fromList ([Item CSV] -> CSV) -> [Item CSV] -> CSV
forall a b. (a -> b) -> a -> b
$ case Maybe Row
mh of
        Maybe Row
Nothing -> Row
x Row -> [Row] -> [Row]
forall a. a -> [a] -> [a]
: [Row]
xs
        Just Row
h  -> Row
h Row -> [Row] -> [Row]
forall a. a -> [a] -> [a]
: Row
x Row -> [Row] -> [Row]
forall a. a -> [a] -> [a]
: [Row]
xs

header :: Parser String Row
header :: Parser String Row
header = do
    Field
x <- Parser String Field
name
    [Field]
xs <- Parser String Field -> Parser String [Field]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (Parser String Field -> Parser String [Field])
-> Parser String Field -> Parser String [Field]
forall a b. (a -> b) -> a -> b
$ Element String -> Parser String ()
forall input.
(ParserSource input, Eq (Element input),
 Element input ~ Element (Chunk input)) =>
Element input -> Parser input ()
element Char
Element String
comma Parser String () -> Parser String Field -> Parser String Field
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser String Field
name
    Row -> Parser String Row
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Row -> Parser String Row) -> Row -> Parser String Row
forall a b. (a -> b) -> a -> b
$ [Item Row] -> Row
forall l. IsList l => [Item l] -> l
fromList ([Item Row] -> Row) -> [Item Row] -> Row
forall a b. (a -> b) -> a -> b
$ Field
x Field -> [Field] -> [Field]
forall a. a -> [a] -> [a]
: [Field]
xs

record :: Parser String Row
record :: Parser String Row
record = do
    Field
x <- Parser String Field
field
    [Field]
xs <- Parser String Field -> Parser String [Field]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some (Parser String Field -> Parser String [Field])
-> Parser String Field -> Parser String [Field]
forall a b. (a -> b) -> a -> b
$ Element String -> Parser String ()
forall input.
(ParserSource input, Eq (Element input),
 Element input ~ Element (Chunk input)) =>
Element input -> Parser input ()
element Char
Element String
comma Parser String () -> Parser String Field -> Parser String Field
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser String Field
field
    Row -> Parser String Row
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Row -> Parser String Row) -> Row -> Parser String Row
forall a b. (a -> b) -> a -> b
$ [Item Row] -> Row
forall l. IsList l => [Item l] -> l
fromList ([Item Row] -> Row) -> [Item Row] -> Row
forall a b. (a -> b) -> a -> b
$ Field
x Field -> [Field] -> [Field]
forall a. a -> [a] -> [a]
: [Field]
xs

name :: Parser String Field
name :: Parser String Field
name = Parser String Field
field
{-# INLINE name #-}

field :: Parser String Field
field :: Parser String Field
field = Parser String Field
escaped Parser String Field -> Parser String Field -> Parser String Field
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser String Field
nonEscaped

escaped :: Parser String Field
escaped :: Parser String Field
escaped = Element String -> Parser String ()
forall input.
(ParserSource input, Eq (Element input),
 Element input ~ Element (Chunk input)) =>
Element input -> Parser input ()
element Char
Element String
dquote Parser String () -> Parser String Field -> Parser String Field
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser String Field
escaped'
  where
    escaped' :: Parser String Field
escaped' = do
        String
x <- (Element String -> Bool) -> Parser String (Chunk String)
forall input.
(ParserSource input, Sequential (Chunk input)) =>
(Element input -> Bool) -> Parser input (Chunk input)
takeWhile (Char
dquote Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=)
        Element String -> Parser String ()
forall input.
(ParserSource input, Eq (Element input),
 Element input ~ Element (Chunk input)) =>
Element input -> Parser input ()
element Char
Element String
dquote
        Maybe Char
p <- Parser String (Maybe Char)
forall input.
ParserSource input =>
Parser input (Maybe (Element input))
peek
        if Maybe Char
p Maybe Char -> Maybe Char -> Bool
forall a. Eq a => a -> a -> Bool
== (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
dquote)
            then CountOf (Element String) -> Parser String ()
forall input.
ParserSource input =>
CountOf (Element input) -> Parser input ()
skip CountOf (Element String)
1 Parser String () -> Parser String Field -> Parser String Field
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> Parser String Field
forall input.
(ParserSource input, Eq (Element input), Sequential (Chunk input),
 Chunk input ~ String, Element input ~ Element (Chunk input),
 Element input ~ Char) =>
String -> Parser input Field
descaped' (String -> Char -> String
snoc String
x Char
dquote)
            else Field -> Parser String Field
forall (f :: * -> *) a. Applicative f => a -> f a
pure (String -> Escaping -> Field
FieldString String
x Escaping
Escape)
    descaped' :: String -> Parser input Field
descaped' String
acc = do
        String
x <- (Element input -> Bool) -> Parser input (Chunk input)
forall input.
(ParserSource input, Sequential (Chunk input)) =>
(Element input -> Bool) -> Parser input (Chunk input)
takeWhile (Char
dquote Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/=)
        Element input -> Parser input ()
forall input.
(ParserSource input, Eq (Element input),
 Element input ~ Element (Chunk input)) =>
Element input -> Parser input ()
element Char
Element input
dquote
        Maybe Char
p <- Parser input (Maybe Char)
forall input.
ParserSource input =>
Parser input (Maybe (Element input))
peek
        if Maybe Char
p Maybe Char -> Maybe Char -> Bool
forall a. Eq a => a -> a -> Bool
== (Char -> Maybe Char
forall a. a -> Maybe a
Just Char
dquote)
            then CountOf (Element input) -> Parser input ()
forall input.
ParserSource input =>
CountOf (Element input) -> Parser input ()
skip CountOf (Element input)
1 Parser input () -> Parser input Field -> Parser input Field
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> Parser input Field
descaped' (String
acc String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String -> Char -> String
snoc String
x Char
dquote)
            else Field -> Parser input Field
forall (f :: * -> *) a. Applicative f => a -> f a
pure (String -> Escaping -> Field
FieldString (String
acc String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
x) Escaping
DoubleEscape)

nonEscaped :: Parser String Field
nonEscaped :: Parser String Field
nonEscaped = (String -> Escaping -> Field) -> Escaping -> String -> Field
forall a b c. (a -> b -> c) -> b -> a -> c
flip String -> Escaping -> Field
FieldString Escaping
NoEscape (String -> Field) -> Parser String String -> Parser String Field
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Element String -> Bool) -> Parser String (Chunk String)
forall input.
(ParserSource input, Sequential (Chunk input)) =>
(Element input -> Bool) -> Parser input (Chunk input)
takeWhile (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Char -> String -> Bool) -> String -> Char -> Bool
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> String -> Bool
forall c a.
(Collection c, Eq a, a ~ Element c) =>
Element c -> c -> Bool
elem String
specials)
{-# INLINE nonEscaped #-}

comma :: Char
comma :: Char
comma = Char
','
{-# INLINE comma #-}

cr :: Char
cr :: Char
cr = Char
'\r'
{-# INLINE cr #-}

dquote :: Char
dquote :: Char
dquote = Char
'"'
{-# INLINE dquote #-}

lf :: Char
lf :: Char
lf = Char
'\n'
{-# INLINE lf #-}

crlf :: String
crlf :: String
crlf = [Item String] -> String
forall l. IsList l => [Item l] -> l
fromList [Char
Item String
cr, Char
Item String
lf]
{-# NOINLINE crlf #-}

{-
textdataQuoted :: String
textdataQuoted = textdata <> specials
{-# NOINLINE textdataQuoted #-}
-}

specials :: String
specials :: String
specials = String
",\r\n"
{-# INLINE specials #-}

{-
textdata :: String
textdata = fromList $ [' '..'!'] <> ['#'..'+'] <> ['-'..'~']
{-# NOINLINE textdata #-}
-}