module Siphon.Internal.Text where
import Siphon.Types
import Control.Applicative (optional)
import Data.Attoparsec.Text (char, endOfInput, string)
import qualified Data.Attoparsec.Text as A
import qualified Data.Attoparsec.Text.Lazy as AL
import qualified Data.Attoparsec.Zepto as Z
import qualified Data.Text as T
import qualified Data.Text as Text
import qualified Data.Vector as V
import qualified Data.Text.Lazy as LText
import qualified Data.Text.Lazy.Builder as Builder
import Data.Text.Lazy.Builder (Builder)
import Data.Word (Word8)
import Data.Vector (Vector)
import Data.Text (Text)
import Data.Coerce (coerce)
import Siphon.Types
import Control.Applicative
import Data.Monoid
text :: Siphon Text
text = Siphon
escape
encodeRow
(A.parse (row comma))
Text.null
encodeRow :: Vector (Escaped Text) -> Text
encodeRow = id
. flip Text.append (Text.singleton newline)
. Text.intercalate (Text.singleton comma)
. V.toList
. coerce
escape :: Text -> Escaped Text
escape t = case Text.find (\c -> c == newline || c == cr || c == comma || c == doubleQuote) t of
Nothing -> Escaped t
Just _ -> escapeAlways t
escapeAlways :: Text -> Escaped Text
escapeAlways t = Escaped $ Text.concat
[ textDoubleQuote
, Text.replace textDoubleQuote (Text.pack [doubleQuote,doubleQuote]) t
, textDoubleQuote
]
sepByDelim1' :: A.Parser a
-> Char
-> A.Parser [a]
sepByDelim1' p !delim = liftM2' (:) p loop
where
loop = do
mb <- A.peekChar
case mb of
Just b | b == delim -> liftM2' (:) (A.anyChar *> p) loop
_ -> pure []
sepByEndOfLine1' :: A.Parser a
-> A.Parser [a]
sepByEndOfLine1' p = liftM2' (:) p loop
where
loop = do
mb <- A.peekChar
case mb of
Just b | b == cr ->
liftM2' (:) (A.anyChar *> A.char newline *> p) loop
| b == newline ->
liftM2' (:) (A.anyChar *> p) loop
_ -> pure []
row :: Char
-> A.Parser (Vector Text)
row !delim = rowNoNewline delim <* endOfLine
rowNoNewline :: Char
-> A.Parser (Vector Text)
rowNoNewline !delim = V.fromList <$!> field delim `sepByDelim1'` delim
field :: Char -> A.Parser Text
field !delim = do
mb <- A.peekChar
case mb of
Just b | b == doubleQuote -> escapedField
_ -> unescapedField delim
escapedField :: A.Parser Text
escapedField = do
_ <- dquote
b <- escapedFieldInner mempty
return (LText.toStrict (Builder.toLazyText b))
escapedFieldInner :: Builder -> A.Parser Builder
escapedFieldInner b = do
t <- A.takeTill (== doubleQuote)
_ <- A.anyChar
c <- A.peekChar'
if c == doubleQuote
then do
_ <- A.anyChar
escapedFieldInner (b `mappend` Builder.fromText t `mappend` Builder.fromText textDoubleQuote)
else return (b `mappend` Builder.fromText t)
unescapedField :: Char -> A.Parser Text
unescapedField !delim = A.takeWhile (\ c -> c /= doubleQuote &&
c /= newline &&
c /= delim &&
c /= cr)
dquote :: A.Parser Char
dquote = char doubleQuote
unescape :: A.Parser Text
unescape = (LText.toStrict . Builder.toLazyText) <$!> go mempty where
go acc = do
h <- A.takeWhile (/= doubleQuote)
let rest = do
c0 <- A.anyChar
c1 <- A.anyChar
if (c0 == doubleQuote && c1 == doubleQuote)
then go (acc `mappend` Builder.fromText h `mappend` Builder.fromText textDoubleQuote)
else fail "invalid CSV escape sequence"
done <- A.atEnd
if done
then return (acc `mappend` Builder.fromText h)
else rest
(<$!>) :: Monad m => (a -> b) -> m a -> m b
f <$!> m = do
a <- m
return $! f a
infixl 4 <$!>
liftM2' :: (Monad m) => (a -> b -> c) -> m a -> m b -> m c
liftM2' f a b = do
!x <- a
y <- b
return (f x y)
endOfLine :: A.Parser ()
endOfLine = (A.char newline *> return ()) <|> (string (Text.pack "\r\n") *> return ()) <|> (A.char cr *> return ())
textDoubleQuote :: Text
textDoubleQuote = Text.singleton doubleQuote
doubleQuote, newline, cr, comma :: Char
doubleQuote = '\"'
newline = '\n'
cr = '\r'
comma = ','