{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  $module
-- Copyright   :  (c) Laurent P. René de Cotret
-- License     :  MIT
-- Maintainer  :  laurent.decotret@outlook.com
-- Portability :  portable
--
-- This module contains functions to serialize/deserialize generic 'Series'
-- to/from bytes.
--
-- Use this module if you want to support all types of 'Series'. Otherwise,
-- you should use either the modules "Data.Series.IO" or "Data.Series.Unboxed.IO". 
module Data.Series.Generic.IO (
    -- * Deserialize 'Series'
    readCSV,
    readCSVFromFile,

    -- * Serialize 'Series'
    writeCSV,
    writeCSVToFile,
) where


import           Control.Monad          ( forM )
import           Control.Monad.IO.Class ( MonadIO(liftIO) )
import qualified Data.ByteString        as BS
import qualified Data.ByteString.Lazy   as BL
import           Data.Csv               ( FromNamedRecord(..), ToNamedRecord(..), )
import qualified Data.Csv               as CSV
import           Data.Functor           ( (<&>) )
import qualified Data.HashMap.Strict    as HashMap
import qualified Data.List.NonEmpty     as NE
import           Data.Maybe             ( fromMaybe )
import           Data.Series.Generic    ( Series, fromVector, convert )
import qualified Data.Series.Generic    as GSeries
import qualified Data.Vector            as Boxed
import           Data.Vector.Generic    ( Vector )
import qualified System.IO              as IO


{-|
Read a comma-separated value (CSV) bytestream into a series.

Consider the following bytestream read from a file:

@
latitude,longitude,city
48.856667,2.352222,Paris
40.712778,-74.006111,New York City
25.0375,121.5625,Taipei
-34.603333,-58.381667,Buenos Aires
@

We want to get a series of the latitude an longitude, indexed by the column "city". First, we need
to do is to create a datatype representing the latitude and longitude information, and our index:

@
data LatLong = MkLatLong { latitude  :: Double
                         , longitude :: Double
                         }
    deriving ( Show )

newtype City = MkCity String
    deriving ( Eq, Ord, Show )
@

Second, we need to create an instance of `Data.Csv.FromNamedRecord` for our new types:

@
import "Data.Csv" ( 'FromNamedRecord', '(.:)' )

instance 'FromNamedRecord' LatLong where
    'parseNamedRecord' r = MkLatLong \<$\> r .: "latitude"
                                   \<*\> r .: "longitude"


instance 'FromNamedRecord' City where
    'parseNamedRecord' r = MkCity \<$\> r .: "city"
@

Finally, we're ready to read our stream:

@
import "Data.Series.Generic"
import "Data.Series.Generic.IO"
import "Data.Vector" 

main :: IO ()
main = do
    stream <- (...) -- Read the bytestring from somewhere
    let (latlongs  :: 'Series' Vector City LatLong) = either error id (`readCSV` stream)
    print latlongs
@
-}
readCSV :: (Vector v a, Ord k, FromNamedRecord k, FromNamedRecord a)
        => BL.ByteString
        -> Either String (Series v k a)
readCSV :: forall (v :: * -> *) a k.
(Vector v a, Ord k, FromNamedRecord k, FromNamedRecord a) =>
ByteString -> Either String (Series v k a)
readCSV ByteString
bytes = do
    (Header
_, Vector NamedRecord
records :: Boxed.Vector CSV.NamedRecord) <- ByteString -> Either String (Header, Vector NamedRecord)
forall a.
FromNamedRecord a =>
ByteString -> Either String (Header, Vector a)
CSV.decodeByName ByteString
bytes

    Vector (k, a)
rows <- Parser (Vector (k, a)) -> Either String (Vector (k, a))
forall a. Parser a -> Either String a
CSV.runParser (Parser (Vector (k, a)) -> Either String (Vector (k, a)))
-> Parser (Vector (k, a)) -> Either String (Vector (k, a))
forall a b. (a -> b) -> a -> b
$ Vector NamedRecord
-> (NamedRecord -> Parser (k, a)) -> Parser (Vector (k, a))
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM Vector NamedRecord
records
                          ((NamedRecord -> Parser (k, a)) -> Parser (Vector (k, a)))
-> (NamedRecord -> Parser (k, a)) -> Parser (Vector (k, a))
forall a b. (a -> b) -> a -> b
$ \NamedRecord
record -> (,) (k -> a -> (k, a)) -> Parser k -> Parser (a -> (k, a))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> NamedRecord -> Parser k
forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord NamedRecord
record
                                           Parser (a -> (k, a)) -> Parser a -> Parser (k, a)
forall a b. Parser (a -> b) -> Parser a -> Parser b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> NamedRecord -> Parser a
forall a. FromNamedRecord a => NamedRecord -> Parser a
parseNamedRecord NamedRecord
record

    Series v k a -> Either String (Series v k a)
forall a. a -> Either String a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Series v k a -> Either String (Series v k a))
-> Series v k a -> Either String (Series v k a)
forall a b. (a -> b) -> a -> b
$ Series Vector k a -> Series v k a
forall (v1 :: * -> *) a (v2 :: * -> *) k.
(Vector v1 a, Vector v2 a) =>
Series v1 k a -> Series v2 k a
convert (Series Vector k a -> Series v k a)
-> Series Vector k a -> Series v k a
forall a b. (a -> b) -> a -> b
$ Vector (k, a) -> Series Vector k a
forall k (v :: * -> *) a.
(Ord k, Vector v k, Vector v a, Vector v (k, a)) =>
v (k, a) -> Series v k a
fromVector Vector (k, a)
rows


fromFile :: MonadIO m 
         => FilePath
         -> (BL.ByteString -> Either String b)
         -> m (Either String b)
fromFile :: forall (m :: * -> *) b.
MonadIO m =>
String -> (ByteString -> Either String b) -> m (Either String b)
fromFile String
fp ByteString -> Either String b
f
    = IO (Either String b) -> m (Either String b)
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Either String b) -> m (Either String b))
-> IO (Either String b) -> m (Either String b)
forall a b. (a -> b) -> a -> b
$ String
-> IOMode
-> (Handle -> IO (Either String b))
-> IO (Either String b)
forall r. String -> IOMode -> (Handle -> IO r) -> IO r
IO.withFile String
fp IOMode
IO.ReadMode ((Handle -> IO (Either String b)) -> IO (Either String b))
-> (Handle -> IO (Either String b)) -> IO (Either String b)
forall a b. (a -> b) -> a -> b
$ \Handle
h -> do
        Handle -> Bool -> IO ()
IO.hSetBinaryMode Handle
h Bool
True
        Handle -> IO ByteString
BS.hGetContents Handle
h IO ByteString
-> (ByteString -> Either String b) -> IO (Either String b)
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> ByteString -> Either String b
f (ByteString -> Either String b)
-> (ByteString -> ByteString) -> ByteString -> Either String b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict


{-|
This is a helper function to read a CSV directly from a filepath.
See the documentation for 'readCSV' on how to prepare your types.
Then, for example, you can use 'readCSVFromFile' as:

@
import "Data.Series.Generic"
import "Data.Series.Generic.IO"
import "Data.Vector"

main :: IO ()
main = do
    let (latlongs  :: 'Series' Vector City LatLong) = either error id \<$\> `readCSVFromFile` "somefile.csv"
    print latlongs
@
-}
readCSVFromFile :: (MonadIO m, Vector v a, Ord k, FromNamedRecord k, FromNamedRecord a)
                => FilePath
                -> m (Either String (Series v k a))
readCSVFromFile :: forall (m :: * -> *) (v :: * -> *) a k.
(MonadIO m, Vector v a, Ord k, FromNamedRecord k,
 FromNamedRecord a) =>
String -> m (Either String (Series v k a))
readCSVFromFile String
fp = String
-> (ByteString -> Either String (Series v k a))
-> m (Either String (Series v k a))
forall (m :: * -> *) b.
MonadIO m =>
String -> (ByteString -> Either String b) -> m (Either String b)
fromFile String
fp ByteString -> Either String (Series v k a)
forall (v :: * -> *) a k.
(Vector v a, Ord k, FromNamedRecord k, FromNamedRecord a) =>
ByteString -> Either String (Series v k a)
readCSV 


-- | Serialize a 'Series' to bytes. 
writeCSV :: (Vector v a, ToNamedRecord k, ToNamedRecord a)
         => Series v k a
         -> BL.ByteString
writeCSV :: forall (v :: * -> *) a k.
(Vector v a, ToNamedRecord k, ToNamedRecord a) =>
Series v k a -> ByteString
writeCSV Series v k a
xs = ByteString -> Maybe ByteString -> ByteString
forall a. a -> Maybe a -> a
fromMaybe ByteString
forall a. Monoid a => a
mempty (Maybe ByteString -> ByteString) -> Maybe ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ do
    NonEmpty NamedRecord
recs   <- [NamedRecord] -> Maybe (NonEmpty NamedRecord)
forall a. [a] -> Maybe (NonEmpty a)
NE.nonEmpty [ k -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
toNamedRecord k
k NamedRecord -> NamedRecord -> NamedRecord
forall a. Semigroup a => a -> a -> a
<> a -> NamedRecord
forall a. ToNamedRecord a => a -> NamedRecord
toNamedRecord a
v | (k
k, a
v) <- Series v k a -> [(k, a)]
forall (v :: * -> *) a k. Vector v a => Series v k a -> [(k, a)]
GSeries.toList Series v k a
xs]
    let header :: Header
header =  [ByteString] -> Header
CSV.header ([ByteString] -> Header) -> [ByteString] -> Header
forall a b. (a -> b) -> a -> b
$ NamedRecord -> [ByteString]
forall k v. HashMap k v -> [k]
HashMap.keys (NamedRecord -> [ByteString]) -> NamedRecord -> [ByteString]
forall a b. (a -> b) -> a -> b
$ NonEmpty NamedRecord -> NamedRecord
forall a. NonEmpty a -> a
NE.head NonEmpty NamedRecord
recs
    ByteString -> Maybe ByteString
forall a. a -> Maybe a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ Header -> [NamedRecord] -> ByteString
forall a. ToNamedRecord a => Header -> [a] -> ByteString
CSV.encodeByName Header
header ([NamedRecord] -> ByteString) -> [NamedRecord] -> ByteString
forall a b. (a -> b) -> a -> b
$ NonEmpty NamedRecord -> [NamedRecord]
forall a. NonEmpty a -> [a]
NE.toList NonEmpty NamedRecord
recs


-- | This is a helper function to write a 'Series' directly to a file.
writeCSVToFile :: (MonadIO m, Vector v a, ToNamedRecord k, ToNamedRecord a)
               => FilePath
               -> Series v k a
               -> m ()
writeCSVToFile :: forall (m :: * -> *) (v :: * -> *) a k.
(MonadIO m, Vector v a, ToNamedRecord k, ToNamedRecord a) =>
String -> Series v k a -> m ()
writeCSVToFile String
fp 
    = IO () -> m ()
forall a. IO a -> m a
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO 
    (IO () -> m ()) -> (Series v k a -> IO ()) -> Series v k a -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString -> IO ()
BS.writeFile String
fp 
    (ByteString -> IO ())
-> (Series v k a -> ByteString) -> Series v k a -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.toStrict 
    (ByteString -> ByteString)
-> (Series v k a -> ByteString) -> Series v k a -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Series v k a -> ByteString
forall (v :: * -> *) a k.
(Vector v a, ToNamedRecord k, ToNamedRecord a) =>
Series v k a -> ByteString
writeCSV