{-# LANGUAGE OverloadedStrings, BangPatterns #-}
{-# LANGUAGE PackageImports #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}

module Data.CSV.Conduit
    ( 
    
    -- * Fundamental Types
      CSV (..)
    , CSVSettings (..)
    , defCSVSettings
    , MapRow
    , Row
    -- * Convenience Functions
    , readCSVFile
    , transformCSV
    , mapCSVFile
    
    -- * Re-exported For Convenience
    , runResourceT
    ) where

-------------------------------------------------------------------------------
import           Control.Applicative        hiding (many)
import           Control.Exception          (bracket, SomeException)
import           Control.Monad              (mzero, mplus, foldM, when, liftM)
import           Control.Monad.IO.Class     (liftIO, MonadIO)
import           Control.Monad.Trans.Control
import           Data.Attoparsec            as P hiding (take)
import qualified Data.Attoparsec.Char8      as C8
import qualified Data.ByteString            as B
import           Data.ByteString.Char8      (ByteString)
import qualified Data.ByteString.Char8      as B8
import           Data.ByteString.Internal   (c2w)
import           Data.Conduit as C
import           Data.Conduit.Attoparsec
import           Data.Conduit.Binary (sourceFile, sinkFile)
import qualified Data.Conduit.List as C
import           Data.Conduit.Text
import qualified Data.Map                   as M
import           Data.String
import           Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import           Data.Word                  (Word8)
import           Safe                       (headMay)
import           System.Directory
import           System.PosixCompat.Files   (getFileStatus, fileSize)
-------------------------------------------------------------------------------
import qualified Data.CSV.Conduit.Parser.ByteString as BSP
import qualified Data.CSV.Conduit.Parser.Text as TP
import           Data.CSV.Conduit.Types
-------------------------------------------------------------------------------


-------------------------------------------------------------------------------
-- | Represents types 'r' that are CSV-like and can be converted
-- to/from an underlying stream of type 's'.
--
-- 
-- Example #1: Basics Using Convenience API
-- 
-- @
-- import Data.Conduit
-- import Data.Conduit.Binary
-- import Data.Conduit.List as CL
-- import Data.CSV.Conduit
--
-- myProcessor :: Conduit (Row Text) m (Row Text)
-- myProcessor = CL.map reverse
-- 
-- test = runResourceT $ 
--   transformCSV defCSVSettings 
--                (sourceFile "input.csv") 
--                myProcessor
--                (sinkFile "output.csv")
-- @
--
--
-- Example #2: Basics Using Conduit API
-- 
-- @
-- import Data.Conduit
-- import Data.Conduit.Binary
-- import Data.CSV.Conduit
--
-- myProcessor :: Conduit (Row Text) m (Row Text)
-- myProcessor = undefined
--
-- test = runResourceT $ 
--   sourceFile "test/BigFile.csv" $= 
--   intoCSV defCSVSettings $=
--   myProcessor $=
--   fromCSV defCSVSettings $$
--   sinkFile "test/BigFileOut.csv"
-- @
class CSV s r where

  -----------------------------------------------------------------------------
  -- | Convert a CSV row into strict ByteString equivalent.
  rowToStr :: CSVSettings -> r -> s

  -----------------------------------------------------------------------------
  -- | Turn a stream of 's' into a stream of CSV row type. An example
  -- would be parsing a ByteString stream as rows of 'MapRow' 'Text'.
  intoCSV :: MonadResource m => CSVSettings -> Conduit s m r

  -----------------------------------------------------------------------------
  -- | Turn a stream of CSV row type back into a stream of 's'. An
  -- example would be rendering a stream of 'Row' 'ByteString' rows as
  -- 'Text'.
  fromCSV :: MonadResource m => CSVSettings -> Conduit r m s


------------------------------------------------------------------------------
-- | 'Row' instance using 'ByteString'
instance CSV ByteString (Row ByteString) where
  rowToStr s !r = 
    let 
      sep = B.pack [c2w (csvOutputColSep s)] 
      wrapField !f = case (csvOutputQuoteChar s) of
        Just !x -> x `B8.cons` escape x f `B8.snoc` x
        otherwise -> f
      escape c str = B8.intercalate (B8.pack [c,c]) $ B8.split c str
    in B.intercalate sep . map wrapField $ r

  intoCSV set = intoCSVRow (BSP.row set)
  fromCSV set = fromCSVRow set


------------------------------------------------------------------------------
-- | 'Row' instance using 'Text'
instance CSV Text (Row Text) where
  rowToStr s !r = 
    let 
      sep = T.pack [(csvOutputColSep s)] 
      wrapField !f = case (csvOutputQuoteChar s) of
        Just !x -> x `T.cons` escape x f `T.snoc` x
        otherwise -> f
      escape c str = T.intercalate (T.pack [c,c]) $ T.split (== c) str
    in T.intercalate sep . map wrapField $ r

  intoCSV set = intoCSVRow (TP.row set)
  fromCSV set = fromCSVRow set


-------------------------------------------------------------------------------
-- | 'Row' instance using 'Text' based on 'ByteString' stream
instance CSV ByteString (Row Text) where
    rowToStr s r = T.encodeUtf8 $ rowToStr s r
    intoCSV set = intoCSV set =$= C.map (map T.decodeUtf8)
    fromCSV set = fromCSV set =$= C.map T.encodeUtf8



-------------------------------------------------------------------------------
fromCSVRow set = conduitState init push close
  where
    init = ()
    push st r = return $ StateProducing st [rowToStr set r, "\n"]
    close _ = return []


-------------------------------------------------------------------------------
intoCSVRow p = parser =$= puller
  where
    parser = sequenceSink () seqSink
    seqSink _ = do
      p <- sinkParser p
      return $ Emit () [p]
    puller = do
      inc <- await
      case inc of
        Nothing -> return ()
        Just i ->
          case i of
            Just i' -> yield i' >> puller
            Nothing -> puller



-------------------------------------------------------------------------------
-- | Generic 'MapRow' instance; any stream type with a 'Row' instance
-- automatically gets a 'MapRow' instance.
instance (CSV s (Row s'), Ord s', IsString s) => CSV s (MapRow s') where
  rowToStr s r = rowToStr s . M.elems $ r
  intoCSV set = intoCSVMap set
  fromCSV set = fromCSVMap set


-------------------------------------------------------------------------------
intoCSVMap set = intoCSV set =$= converter
  where
    converter = conduitState Nothing push close 
      where
        push Nothing row = 
          case row of
            [] -> return $ StateProducing Nothing []
            xs -> return $ StateProducing (Just xs) []
        push st@(Just hs) row = return $ StateProducing st [toMapCSV hs row]
        toMapCSV !headers !fs = M.fromList $ zip headers fs
        close _ = return []


-------------------------------------------------------------------------------
fromCSVMap set = conduitState False push close
  where
    push False r = return $ StateProducing True 
                   [rowToStr set (M.keys r), "\n", rowToStr set (M.elems r), "\n"]
    push True r = return $ StateProducing True
                   [rowToStr set (M.elems r), "\n"]
    close _ = return []



                          ---------------------------
                          -- Convenience Functions --
                          ---------------------------


-------------------------------------------------------------------------------
-- | Read the entire contents of a CSV file into memory.
--
-- An easy way to run this function would be 'runResourceT' after
-- feeding it all the arguments.
readCSVFile 
    :: (MonadResource m, CSV ByteString a) 
    => CSVSettings 
    -> FilePath 
    -- ^ Input file
    -> m [a]
readCSVFile set fp = sourceFile fp $= intoCSV set $$ C.consume


-------------------------------------------------------------------------------
-- | Map over the rows of a CSV file. Provided for convenience for
-- historical reasons.
--
-- An easy way to run this function would be 'runResourceT' after
-- feeding it all the arguments.
mapCSVFile 
    :: (MonadResource m, CSV ByteString a, CSV ByteString b) 
    => CSVSettings 
    -- ^ Settings to use both for input and output
    -> (a -> [b]) 
    -- ^ A mapping function
    -> FilePath 
    -- ^ Input file
    -> FilePath 
    -- ^ Output file
    -> m ()
mapCSVFile set f fi fo = 
  transformCSV set (sourceFile fi) (C.concatMap f) (sinkFile fo)


-------------------------------------------------------------------------------
-- | General purpose CSV transformer. Apply a list-like processing
-- function from 'Data.Conduit.List' to the rows of a CSV stream. You
-- need to provide a stream data source, a transformer and a stream
-- data sink.
--
-- An easy way to run this function would be 'runResourceT' after
-- feeding it all the arguments.
--
-- Example - map a function over the rows of a CSV file:
-- 
-- > transformCSV set (sourceFile inFile) (C.map f) (sinkFile outFile)
transformCSV 
    :: (MonadResource m, CSV s a, CSV s' b) 
    => CSVSettings 
    -- ^ Settings to be used for input and output
    -> Source m s
    -- ^ A raw stream data source. Ex: 'sourceFile inFile'
    -> Conduit a m b
    -- ^ A transforming conduit
    -> Sink s' m ()
    -- ^ A raw stream data sink. Ex: 'sinkFile outFile'
    -> m ()
transformCSV set source c sink = 
    source $=
    intoCSV set $=
    c $=
    fromCSV set $$
    sink
    

                               -----------------
                               -- Simple Test --
                               -----------------


test :: IO ()
test = runResourceT $ 
  sourceFile "test/BigFile.csv" $= 
  decode utf8 $=
  (intoCSV defCSVSettings 
    :: forall m. MonadResource m => Conduit Text m (MapRow Text)) $= 
  fromCSV defCSVSettings $=
  encode utf8 $$
  sinkFile "test/BigFileOut.csv"