module Text.ParseCSV
( CSV
, parseCSV
) where
import Prelude hiding (concat, takeWhile)
import Control.Applicative ((<$>), (<|>), (<*>), (<*), (*>), many)
import Control.Monad (void)
import Data.Attoparsec.Text
import qualified Data.Text as T (Text, concat, cons, append)
type CSV = [[T.Text]]
lineEnd :: Parser ()
lineEnd =
void (char '\n') <|> void (string "\r\n")
<?> "end of line"
unquotedField :: Parser T.Text
unquotedField =
takeWhile (`notElem` ",\n\r\"")
<?> "unquoted field"
insideQuotes :: Parser T.Text
insideQuotes =
T.append <$> takeWhile (/= '"')
<*> (T.concat <$> many (T.cons <$> dquotes <*> insideQuotes))
<?> "inside of double quotes"
where
dquotes =
string "\"\"" >> return '"'
<?> "paired double quotes"
quotedField :: Parser T.Text
quotedField =
char '"' *> insideQuotes <* char '"'
<?> "quoted field"
field :: Parser T.Text
field =
quotedField <|> unquotedField
<?> "field"
record :: Parser [T.Text]
record =
field `sepBy1` char ','
<?> "record"
file :: Parser CSV
file =
(:) <$> record
<*> manyTill (lineEnd *> record)
(endOfInput <|> lineEnd *> endOfInput)
<?> "file"
parseCSV :: T.Text -> Either String CSV
parseCSV =
parseOnly file