module Data.Csv.Encoding
(
decode
, decodeByName
, encode
, encodeByName
, DecodeOptions(..)
, defaultDecodeOptions
, decodeWith
, decodeByNameWith
, EncodeOptions(..)
, defaultEncodeOptions
, encodeWith
, encodeByNameWith
) where
import Blaze.ByteString.Builder (Builder, fromByteString, fromWord8,
toLazyByteString)
import Blaze.ByteString.Builder.Char8 (fromString)
import Control.Applicative ((*>), (<|>), optional, pure)
import Data.Attoparsec.Char8 (endOfInput, endOfLine)
import qualified Data.Attoparsec.ByteString.Lazy as AL
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Char8 as BL8
import qualified Data.HashMap.Strict as HM
import Data.Monoid (mconcat, mempty)
import Data.Vector (Vector)
import qualified Data.Vector as V
import Data.Word (Word8)
import Prelude hiding (unlines)
import Data.Csv.Compat.Monoid ((<>))
import Data.Csv.Conversion (FromNamedRecord, FromRecord, ToNamedRecord,
ToRecord, parseNamedRecord, parseRecord, runParser,
toNamedRecord, toRecord)
import Data.Csv.Parser hiding (csv, csvWithHeader)
import qualified Data.Csv.Parser as Parser
import Data.Csv.Types hiding (toNamedRecord)
import qualified Data.Csv.Types as Types
import Data.Csv.Util (blankLine)
decode :: FromRecord a
=> Bool
-> L.ByteString
-> Either String (Vector a)
decode = decodeWith defaultDecodeOptions
decodeByName :: FromNamedRecord a
=> L.ByteString
-> Either String (Header, Vector a)
decodeByName = decodeByNameWith defaultDecodeOptions
encode :: ToRecord a => V.Vector a -> L.ByteString
encode = encodeWith defaultEncodeOptions
encodeByName :: ToNamedRecord a => Header -> V.Vector a -> L.ByteString
encodeByName = encodeByNameWith defaultEncodeOptions
decodeWith :: FromRecord a
=> DecodeOptions
-> Bool
-> L.ByteString
-> Either String (Vector a)
decodeWith = decodeWithC csv
idDecodeWith :: DecodeOptions -> Bool -> L.ByteString
-> Either String (Vector (Vector B.ByteString))
idDecodeWith = decodeWithC Parser.csv
decodeWithC :: (DecodeOptions -> AL.Parser a) -> DecodeOptions -> Bool
-> BL8.ByteString -> Either String a
decodeWithC p !opts skipHeader = decodeWithP parser
where parser
| skipHeader = header (decDelimiter opts) *> p opts
| otherwise = p opts
decodeByNameWith :: FromNamedRecord a
=> DecodeOptions
-> L.ByteString
-> Either String (Header, Vector a)
decodeByNameWith !opts = decodeWithP (csvWithHeader opts)
data EncodeOptions = EncodeOptions
{
encDelimiter :: !Word8
} deriving (Eq, Show)
defaultEncodeOptions :: EncodeOptions
defaultEncodeOptions = EncodeOptions
{ encDelimiter = 44
}
encodeWith :: ToRecord a => EncodeOptions -> V.Vector a -> L.ByteString
encodeWith opts = toLazyByteString
. unlines
. map (encodeRecord (encDelimiter opts) . toRecord)
. V.toList
encodeRecord :: Word8 -> Record -> Builder
encodeRecord delim = mconcat . intersperse (fromWord8 delim)
. map fromByteString . map escape . V.toList
escape :: B.ByteString -> B.ByteString
escape s
| B.find (\ b -> b == dquote || b == comma || b == nl || b == cr ||
b == sp) s == Nothing = s
| otherwise =
B.concat ["\"",
B.concatMap
(\ b -> if b == dquote then "\"\"" else B.singleton b) s,
"\""]
where
dquote = 34
comma = 44
nl = 10
cr = 13
sp = 32
encodeByNameWith :: ToNamedRecord a => EncodeOptions -> Header -> V.Vector a
-> L.ByteString
encodeByNameWith opts hdr v =
toLazyByteString ((encodeRecord (encDelimiter opts) hdr) <>
fromByteString "\r\n" <> records)
where
records = unlines
. map (encodeRecord (encDelimiter opts)
. namedRecordToRecord hdr . toNamedRecord)
. V.toList $ v
namedRecordToRecord :: Header -> NamedRecord -> Record
namedRecordToRecord hdr nr = V.map find hdr
where
find n = case HM.lookup n nr of
Nothing -> moduleError "namedRecordToRecord" $
"header contains name " ++ show (B8.unpack n) ++
" which is not present in the named record"
Just v -> v
moduleError :: String -> String -> a
moduleError func msg = error $ "Data.Csv.Encoding." ++ func ++ ": " ++ msg
unlines :: [Builder] -> Builder
unlines [] = mempty
unlines (b:bs) = b <> fromString "\r\n" <> unlines bs
intersperse :: Builder -> [Builder] -> [Builder]
intersperse _ [] = []
intersperse sep (x:xs) = x : prependToAll sep xs
prependToAll :: Builder -> [Builder] -> [Builder]
prependToAll _ [] = []
prependToAll sep (x:xs) = sep <> x : prependToAll sep xs
decodeWithP :: AL.Parser a -> L.ByteString -> Either String a
decodeWithP p s =
case AL.parse p s of
AL.Done _ v -> Right v
AL.Fail left _ msg -> Left $ "parse error (" ++ msg ++ ") at " ++
show (BL8.unpack left)
csv :: FromRecord a => DecodeOptions -> AL.Parser (V.Vector a)
csv !opts = do
vals <- records
_ <- optional endOfLine
endOfInput
return $! V.fromList vals
where
records = do
!r <- record (decDelimiter opts)
if blankLine r
then (endOfLine *> records) <|> pure []
else case runParser (parseRecord r) of
Left msg -> fail $ "conversion error: " ++ msg
Right val -> do
!vals <- (endOfLine *> records) <|> pure []
return (val : vals)
csvWithHeader :: FromNamedRecord a => DecodeOptions
-> AL.Parser (Header, V.Vector a)
csvWithHeader !opts = do
!hdr <- header (decDelimiter opts)
vals <- records hdr
_ <- optional endOfLine
endOfInput
let !v = V.fromList vals
return (hdr, v)
where
records hdr = do
!r <- record (decDelimiter opts)
if blankLine r
then (endOfLine *> records hdr) <|> pure []
else case runParser (convert hdr r) of
Left msg -> fail $ "conversion error: " ++ msg
Right val -> do
!vals <- (endOfLine *> records hdr) <|> pure []
return (val : vals)
convert hdr = parseNamedRecord . Types.toNamedRecord hdr