module Data.CSV.Conduit
( CSVeable (..)
, CSVSettings (..)
, defCSVSettings
, MapRow
, Row
, readCSVFile
, mapCSVFile
) where
import Control.Applicative hiding (many)
import Control.Exception (bracket, SomeException)
import Control.Monad (mzero, mplus, foldM, when, liftM)
import Control.Monad.IO.Class (liftIO, MonadIO)
import Control.Monad.Trans.Control
import Data.Attoparsec as P hiding (take)
import qualified Data.Attoparsec.Char8 as C8
import qualified Data.ByteString as B
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as B8
import Data.ByteString.Internal (c2w)
import Data.Conduit as C
import Data.Conduit.Attoparsec
import Data.Conduit.Binary (sourceFile, sinkFile)
import qualified Data.Conduit.List as C
import Data.Conduit.Text
import qualified Data.Map as M
import Data.String
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Word (Word8)
import Safe (headMay)
import System.Directory
import System.PosixCompat.Files (getFileStatus, fileSize)
import qualified Data.CSV.Conduit.Parser.ByteString as BSP
import qualified Data.CSV.Conduit.Parser.Text as TP
import Data.CSV.Conduit.Types
class CSVeable s r where
rowToStr :: CSVSettings -> r -> s
intoCSV :: MonadResource m => CSVSettings -> Conduit s m r
fromCSV :: MonadResource m => CSVSettings -> Conduit r m s
instance CSVeable ByteString (Row ByteString) where
rowToStr s !r =
let
sep = B.pack [c2w (csvOutputColSep s)]
wrapField !f = case (csvOutputQuoteChar s) of
Just !x -> x `B8.cons` escape x f `B8.snoc` x
otherwise -> f
escape c str = B8.intercalate (B8.pack [c,c]) $ B8.split c str
in B.intercalate sep . map wrapField $ r
intoCSV set = intoCSVRow (BSP.row set)
fromCSV set = fromCSVRow set
instance CSVeable Text (Row Text) where
rowToStr s !r =
let
sep = T.pack [(csvOutputColSep s)]
wrapField !f = case (csvOutputQuoteChar s) of
Just !x -> x `T.cons` escape x f `T.snoc` x
otherwise -> f
escape c str = T.intercalate (T.pack [c,c]) $ T.split (== c) str
in T.intercalate sep . map wrapField $ r
intoCSV set = intoCSVRow (TP.row set)
fromCSV set = fromCSVRow set
instance CSVeable ByteString (Row Text) where
rowToStr s r = T.encodeUtf8 $ rowToStr s r
intoCSV set = intoCSV set =$= C.map (map T.decodeUtf8)
fromCSV set = fromCSV set =$= C.map T.encodeUtf8
fromCSVRow set = conduitState init push close
where
init = ()
push st r = return $ StateProducing st [rowToStr set r, "\n"]
close _ = return []
intoCSVRow p = parser =$= puller
where
parser = sequenceSink () seqSink
seqSink _ = do
p <- sinkParser p
return $ Emit () [p]
puller = do
inc <- await
case inc of
Nothing -> return ()
Just i ->
case i of
Just i' -> yield i' >> puller
Nothing -> puller
instance (CSVeable s (Row s'), Ord s', IsString s) => CSVeable s (MapRow s') where
rowToStr s r = rowToStr s . M.elems $ r
intoCSV set = intoCSVMap set
fromCSV set = fromCSVMap set
intoCSVMap set = intoCSV set =$= converter
where
converter = conduitState Nothing push close
where
push Nothing row =
case row of
[] -> return $ StateProducing Nothing []
xs -> return $ StateProducing (Just xs) []
push st@(Just hs) row = return $ StateProducing st [toMapCSV hs row]
toMapCSV !headers !fs = M.fromList $ zip headers fs
close _ = return []
fromCSVMap set = conduitState False push close
where
push False r = return $ StateProducing True
[rowToStr set (M.keys r), "\n", rowToStr set (M.elems r), "\n"]
push True r = return $ StateProducing True
[rowToStr set (M.elems r), "\n"]
close _ = return []
readCSVFile set fp = runResourceT $ sourceFile fp $= intoCSV set $$ C.consume
mapCSVFile
:: (MonadIO m, MonadUnsafeIO m, MonadThrow m,
MonadBaseControl IO m, CSVeable ByteString a, CSVeable ByteString b)
=> CSVSettings
-> (a -> b)
-> FilePath
-> FilePath
-> m ()
mapCSVFile set f fi fo = runResourceT $
sourceFile fi $=
intoCSV set $=
C.map f $=
fromCSV set $$
sinkFile fo
test :: IO ()
test = runResourceT $
sourceFile "test/BigFile.csv" $=
decode utf8 $=
(intoCSV defCSVSettings
:: forall m. MonadResource m => Conduit Text m (MapRow Text)) $=
fromCSV defCSVSettings $=
encode utf8 $$
sinkFile "test/BigFileOut.csv"