{- |
   Module      : Text.Pandoc.CSV
   Copyright   : Copyright (C) 2017-2023 John MacFarlane <jgm@berkeley.edu>
   License     : GNU GPL, version 2 or above
   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Simple CSV parser.
-}

module Text.Pandoc.CSV (
  CSVOptions(..),
  defaultCSVOptions,
  parseCSV,
  ParseError
) where

import Control.Monad (unless, void, mzero)
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Parsing hiding (escaped)

type Parser = Parsec Text ()

data CSVOptions = CSVOptions{
    CSVOptions -> Char
csvDelim     :: Char
  , CSVOptions -> Maybe Char
csvQuote     :: Maybe Char
  , CSVOptions -> Bool
csvKeepSpace :: Bool -- treat whitespace following delim as significant
  , CSVOptions -> Maybe Char
csvEscape    :: Maybe Char -- default is to double up quote
} deriving (ReadPrec [CSVOptions]
ReadPrec CSVOptions
Int -> ReadS CSVOptions
ReadS [CSVOptions]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [CSVOptions]
$creadListPrec :: ReadPrec [CSVOptions]
readPrec :: ReadPrec CSVOptions
$creadPrec :: ReadPrec CSVOptions
readList :: ReadS [CSVOptions]
$creadList :: ReadS [CSVOptions]
readsPrec :: Int -> ReadS CSVOptions
$creadsPrec :: Int -> ReadS CSVOptions
Read, Int -> CSVOptions -> ShowS
[CSVOptions] -> ShowS
CSVOptions -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CSVOptions] -> ShowS
$cshowList :: [CSVOptions] -> ShowS
show :: CSVOptions -> String
$cshow :: CSVOptions -> String
showsPrec :: Int -> CSVOptions -> ShowS
$cshowsPrec :: Int -> CSVOptions -> ShowS
Show)

defaultCSVOptions :: CSVOptions
defaultCSVOptions :: CSVOptions
defaultCSVOptions = CSVOptions{
    csvDelim :: Char
csvDelim = Char
','
  , csvQuote :: Maybe Char
csvQuote = forall a. a -> Maybe a
Just Char
'"'
  , csvKeepSpace :: Bool
csvKeepSpace = Bool
False
  , csvEscape :: Maybe Char
csvEscape = forall a. Maybe a
Nothing }

parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
parseCSV CSVOptions
opts Text
t = forall s t a.
Stream s Identity t =>
Parsec s () a -> String -> s -> Either ParseError a
parse (CSVOptions -> Parser [[Text]]
pCSV CSVOptions
opts) String
"csv" Text
t

pCSV :: CSVOptions -> Parser [[Text]]
pCSV :: CSVOptions -> Parser [[Text]]
pCSV CSVOptions
opts =
  (CSVOptions -> Parser [Text]
pCSVRow CSVOptions
opts forall s (m :: * -> *) t u a sep.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m sep -> ParsecT s u m [a]
`sepEndBy` Parser ()
endline) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m ()
spaces forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof)

pCSVRow :: CSVOptions -> Parser [Text]
pCSVRow :: CSVOptions -> Parser [Text]
pCSVRow CSVOptions
opts = do
  Text
x <- CSVOptions -> Parser Text
pCSVCell CSVOptions
opts
  [Text]
xs <- (if Text -> Bool
T.null Text
x then forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 else forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many) forall a b. (a -> b) -> a -> b
$ CSVOptions -> Parser ()
pCSVDelim CSVOptions
opts forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> CSVOptions -> Parser Text
pCSVCell CSVOptions
opts
  forall (m :: * -> *) a. Monad m => a -> m a
return (Text
xforall a. a -> [a] -> [a]
:[Text]
xs)

pCSVCell :: CSVOptions -> Parser Text
pCSVCell :: CSVOptions -> Parser Text
pCSVCell CSVOptions
opts = CSVOptions -> Parser Text
pCSVQuotedCell CSVOptions
opts forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> CSVOptions -> Parser Text
pCSVUnquotedCell CSVOptions
opts

pCSVQuotedCell :: CSVOptions -> Parser Text
pCSVQuotedCell :: CSVOptions -> Parser Text
pCSVQuotedCell CSVOptions
opts =
  case CSVOptions -> Maybe Char
csvQuote CSVOptions
opts of
    Maybe Char
Nothing -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
    Just Char
quotechar -> do
      forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
quotechar
      String
res <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
/= Char
quotechar Bool -> Bool -> Bool
&&
                                  forall a. a -> Maybe a
Just Char
c forall a. Eq a => a -> a -> Bool
/= CSVOptions -> Maybe Char
csvEscape CSVOptions
opts) forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> CSVOptions -> ParsecT Text () Identity Char
escaped CSVOptions
opts)
      forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
quotechar
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
res

escaped :: CSVOptions -> Parser Char
escaped :: CSVOptions -> ParsecT Text () Identity Char
escaped CSVOptions
opts =
  case CSVOptions -> Maybe Char
csvEscape CSVOptions
opts of
    Maybe Char
Nothing ->
      case CSVOptions -> Maybe Char
csvQuote CSVOptions
opts of
        Maybe Char
Nothing -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
        Just Char
q -> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
q forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
q
    Just Char
c  -> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
c forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
noneOf String
"\r\n"

pCSVUnquotedCell :: CSVOptions -> Parser Text
pCSVUnquotedCell :: CSVOptions -> Parser Text
pCSVUnquotedCell CSVOptions
opts = String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
/= CSVOptions -> Char
csvDelim CSVOptions
opts Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\r' Bool -> Bool -> Bool
&& Char
c forall a. Eq a => a -> a -> Bool
/= Char
'\n'))

pCSVDelim :: CSVOptions -> Parser ()
pCSVDelim :: CSVOptions -> Parser ()
pCSVDelim CSVOptions
opts = do
  forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char (CSVOptions -> Char
csvDelim CSVOptions
opts)
  let sp :: ParsecT Text u Identity Char
sp = case CSVOptions -> Char
csvDelim CSVOptions
opts of
              Char
'\t' -> forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
' '
              Char
_    -> forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
String -> ParsecT s u m Char
oneOf String
" \t"
  forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (CSVOptions -> Bool
csvKeepSpace CSVOptions
opts) forall a b. (a -> b) -> a -> b
$ forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall {u}. ParsecT Text u Identity Char
sp

endline :: Parser ()
endline :: Parser ()
endline = do
  forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional (forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'\r')
  forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'\n'