{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE TypeFamilies #-}

-- | A csv process based on attoparsec and the box library
module Box.Csv
  ( CsvConfig (..),
    defaultCsvConfig,
    Header (..),
    rowEmitter,
    rowCommitter,
    runCsv,

    -- * parsers
    sep,
    field_,
    field,
    skipField_,
    skipField,
    int,
    int',
    A.double,
    double',
    fields,
    ints,
    doubles,
    day',
    tod',
    localtime',
  )
where

import Box
import Control.Monad
import qualified Data.Attoparsec.Text as A
import Data.Functor.Contravariant
import Data.Text (Text, unpack)
import qualified Data.Text as Text
import Data.Time
import GHC.Generics

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Box
-- >>> import Box.Csv
-- >>> import qualified Data.Text as Text
-- >>> import qualified Data.Attoparsec.Text as A

-- | csv file configuration
data CsvConfig = CsvConfig
  { -- | file name
    CsvConfig -> FilePath
file :: FilePath,
    -- | field separator
    CsvConfig -> Char
fsep :: Char,
    -- | nature of header row(s)
    CsvConfig -> Header
header :: Header
  }
  deriving (Int -> CsvConfig -> ShowS
[CsvConfig] -> ShowS
CsvConfig -> FilePath
(Int -> CsvConfig -> ShowS)
-> (CsvConfig -> FilePath)
-> ([CsvConfig] -> ShowS)
-> Show CsvConfig
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [CsvConfig] -> ShowS
$cshowList :: [CsvConfig] -> ShowS
show :: CsvConfig -> FilePath
$cshow :: CsvConfig -> FilePath
showsPrec :: Int -> CsvConfig -> ShowS
$cshowsPrec :: Int -> CsvConfig -> ShowS
Show, (forall x. CsvConfig -> Rep CsvConfig x)
-> (forall x. Rep CsvConfig x -> CsvConfig) -> Generic CsvConfig
forall x. Rep CsvConfig x -> CsvConfig
forall x. CsvConfig -> Rep CsvConfig x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep CsvConfig x -> CsvConfig
$cfrom :: forall x. CsvConfig -> Rep CsvConfig x
Generic, CsvConfig -> CsvConfig -> Bool
(CsvConfig -> CsvConfig -> Bool)
-> (CsvConfig -> CsvConfig -> Bool) -> Eq CsvConfig
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CsvConfig -> CsvConfig -> Bool
$c/= :: CsvConfig -> CsvConfig -> Bool
== :: CsvConfig -> CsvConfig -> Bool
$c== :: CsvConfig -> CsvConfig -> Bool
Eq)

-- | default csv file details
--
-- >>> defaultCsvConfig
-- CsvConfig {file = "./other/time_series_covid19_deaths_global_narrow.csv", fsep = ',', header = HasHXL}
--
-- test data from https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases
defaultCsvConfig :: CsvConfig
defaultCsvConfig :: CsvConfig
defaultCsvConfig =
  FilePath -> Char -> Header -> CsvConfig
CsvConfig
    FilePath
"./other/time_series_covid19_deaths_global_narrow.csv"
    Char
','
    Header
HasHXL

-- | Type of header rows.  Note the modern propensity for multiple header rows.
data Header = HasHeader | HasHXL | NoHeader deriving (Int -> Header -> ShowS
[Header] -> ShowS
Header -> FilePath
(Int -> Header -> ShowS)
-> (Header -> FilePath) -> ([Header] -> ShowS) -> Show Header
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [Header] -> ShowS
$cshowList :: [Header] -> ShowS
show :: Header -> FilePath
$cshow :: Header -> FilePath
showsPrec :: Int -> Header -> ShowS
$cshowsPrec :: Int -> Header -> ShowS
Show, Header -> Header -> Bool
(Header -> Header -> Bool)
-> (Header -> Header -> Bool) -> Eq Header
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Header -> Header -> Bool
$c/= :: Header -> Header -> Bool
== :: Header -> Header -> Bool
$c== :: Header -> Header -> Bool
Eq)

-- | attoparsec parse emitter which returns the original text on failure
parseE :: (Functor m) => A.Parser a -> Emitter m Text -> Emitter m (Either Text a)
parseE :: Parser a -> Emitter m Text -> Emitter m (Either Text a)
parseE Parser a
parser Emitter m Text
e = (\Text
t -> (FilePath -> Either Text a)
-> (a -> Either Text a) -> Either FilePath a -> Either Text a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Either Text a -> FilePath -> Either Text a
forall a b. a -> b -> a
const (Either Text a -> FilePath -> Either Text a)
-> Either Text a -> FilePath -> Either Text a
forall a b. (a -> b) -> a -> b
$ Text -> Either Text a
forall a b. a -> Either a b
Left Text
t) a -> Either Text a
forall a b. b -> Either a b
Right (Parser a -> Text -> Either FilePath a
forall a. Parser a -> Text -> Either FilePath a
A.parseOnly Parser a
parser Text
t)) (Text -> Either Text a)
-> Emitter m Text -> Emitter m (Either Text a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Emitter m Text
e

-- | A continuation emitter of parsed csv rows from a CsvConfig, returning the original text on failure
-- >>> emit <$|> rowEmitter defaultCsvConfig fields
-- Just (Right ["Province/State","Country/Region","Lat","Long","Date","Value","ISO 3166-1 Alpha 3-Codes","Region Code","Sub-region Code","Intermediate Region Code\r"])
rowEmitter :: CsvConfig -> (Char -> A.Parser a) -> CoEmitter IO (Either Text a)
rowEmitter :: CsvConfig -> (Char -> Parser a) -> CoEmitter IO (Either Text a)
rowEmitter CsvConfig
cfg Char -> Parser a
p = Parser a -> Emitter IO Text -> Emitter IO (Either Text a)
forall (m :: * -> *) a.
Functor m =>
Parser a -> Emitter m Text -> Emitter m (Either Text a)
parseE (Char -> Parser a
p (CsvConfig -> Char
fsep CsvConfig
cfg)) (Emitter IO Text -> Emitter IO (Either Text a))
-> Codensity IO (Emitter IO Text) -> CoEmitter IO (Either Text a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> Codensity IO (Emitter IO Text)
fileE (CsvConfig -> FilePath
file CsvConfig
cfg)

-- | commits printed csv rows
--
-- >>> let testConfig = CsvConfig "./test/test.csv" ',' NoHeader
-- >>> let ctest = rowCommitter testConfig (fmap (Text.intercalate "," . fmap (Text.pack . show)))
--
-- >>> (\c -> commit c [[1..10::Int]]) <$|> ctest
-- True
--
-- >>> emit <$|> rowEmitter testConfig ints
-- Just (Right [1,2,3,4,5,6,7,8,9,10])
rowCommitter :: CsvConfig -> (a -> [Text]) -> CoCommitter IO a
rowCommitter :: CsvConfig -> (a -> [Text]) -> CoCommitter IO a
rowCommitter CsvConfig
cfg a -> [Text]
f = (a -> Text) -> Committer IO Text -> Committer IO a
forall (f :: * -> *) a b. Contravariant f => (a -> b) -> f b -> f a
contramap (Text -> [Text] -> Text
Text.intercalate (Char -> Text
Text.singleton (Char -> Text) -> Char -> Text
forall a b. (a -> b) -> a -> b
$ CsvConfig -> Char
fsep CsvConfig
cfg) ([Text] -> Text) -> (a -> [Text]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Text]
f) (Committer IO Text -> Committer IO a)
-> Codensity IO (Committer IO Text) -> CoCommitter IO a
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> FilePath -> Codensity IO (Committer IO Text)
fileWriteC (CsvConfig -> FilePath
file CsvConfig
cfg)

-- | Run a parser across all lines of a file.
--
-- >>> r1 <- runCsv defaultCsvConfig fields
-- >>> length r1
-- 42562
--
-- >>> length [x | (Left x) <- r1]
-- 0
--
-- >>> take 2 $ drop 2 [x | (Right x) <- r1]
-- [["","Afghanistan","33.0","65.0","2020-06-29","733","AFG","142","34","\r"],["","Afghanistan","33.0","65.0","2020-06-28","721","AFG","142","34","\r"]]
runCsv :: CsvConfig -> (Char -> A.Parser a) -> IO [Either Text a]
runCsv :: CsvConfig -> (Char -> Parser a) -> IO [Either Text a]
runCsv CsvConfig
cfg Char -> Parser a
p = Emitter IO (Either Text a) -> IO [Either Text a]
forall (m :: * -> *) a. Monad m => Emitter m a -> m [a]
toListM (Emitter IO (Either Text a) -> IO [Either Text a])
-> Codensity IO (Emitter IO (Either Text a)) -> IO [Either Text a]
forall a (m :: * -> *) r. (a -> m r) -> Codensity m a -> m r
<$|> CsvConfig
-> (Char -> Parser a) -> Codensity IO (Emitter IO (Either Text a))
forall a.
CsvConfig -> (Char -> Parser a) -> CoEmitter IO (Either Text a)
rowEmitter CsvConfig
cfg Char -> Parser a
p

-- * low-level generic csv parser helpers

-- | Most parsing and building routines implicity assume a character acting as a separator of fields, and newlines separating rows.
--
-- >>> A.parse (sep ',') ",ok"
-- Done "ok" ()
sep :: Char -> A.Parser ()
sep :: Char -> Parser ()
sep Char
c = Parser Text Char -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Char -> Parser Text Char
A.char Char
c)

-- * single field parsers

-- | an unquoted field
-- Does not consume the separator token
--
-- >>> A.parse (field_ ',') "field,ok"
-- Done ",ok" "field"
field_ :: Char -> A.Parser Text
field_ :: Char -> Parser Text
field_ Char
c = (Char -> Bool) -> Parser Text
A.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c)

-- | an unquoted field
-- Consume the separator token
--
-- >>> A.parse (field ',') "field,ok"
-- Done "ok" "field"
field :: Char -> A.Parser Text
field :: Char -> Parser Text
field Char
c = (Char -> Bool) -> Parser Text
A.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c) Parser Text -> Parser Text Char -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
c

-- | skipping a field
--
-- >>> A.parse (skipField_ ',') "field,ok"
-- Done ",ok" ()
skipField_ :: Char -> A.Parser ()
skipField_ :: Char -> Parser ()
skipField_ Char
c = (Char -> Bool) -> Parser ()
A.skipWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c)

-- | skipping a field
--
-- >>> A.parse (skipField ',') "field,ok"
-- Done "ok" ()
skipField :: Char -> A.Parser ()
skipField :: Char -> Parser ()
skipField Char
c = (Char -> Bool) -> Parser ()
A.skipWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c) Parser () -> Parser Text Char -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
','

-- | int parser
--
-- >>> A.parse int "234,ok"
-- Done ",ok" 234
int :: A.Parser Int
int :: Parser Int
int = Parser Int
forall a. Integral a => Parser a
A.decimal

-- | int parser, consumes separator
--
-- >>> A.parse (int' ',') "234,ok"
-- Done "ok" 234
int' :: Char -> A.Parser Int
int' :: Char -> Parser Int
int' Char
c = Parser Int
forall a. Integral a => Parser a
A.decimal Parser Int -> Parser Text Char -> Parser Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
c

-- | double parser, consumes separator
--
-- >>> A.parse (double' ',') "234.000,ok"
-- Done "ok" 234.0
double' :: Char -> A.Parser Double
double' :: Char -> Parser Double
double' Char
c = Parser Double
A.double Parser Double -> Parser Text Char -> Parser Double
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
c

-- | Day parser, consumes separator
--
-- >>> A.parse (day' ',') "2020-07-01,ok"
-- Done "ok" 2020-07-01
day' :: Char -> A.Parser Day
day' :: Char -> Parser Day
day' Char
c = do
  Text
d <- (Char -> Bool) -> Parser Text
A.takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)
  Day
d' <- Bool -> TimeLocale -> FilePath -> FilePath -> Parser Day
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> FilePath -> FilePath -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale FilePath
"%F" (Text -> FilePath
unpack Text
d)
  Char
_ <- Char -> Parser Text Char
A.char Char
c
  Day -> Parser Day
forall (f :: * -> *) a. Applicative f => a -> f a
pure Day
d'

-- | TimeOfDay parser, consumes separator
--
-- >>> A.parse (tod' ',') "23:52:05.221109,ok"
-- Done "ok" 23:52:05.221109
tod' :: Char -> A.Parser TimeOfDay
tod' :: Char -> Parser TimeOfDay
tod' Char
c = do
  Text
d <- (Char -> Bool) -> Parser Text
A.takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)
  TimeOfDay
d' <- Bool -> TimeLocale -> FilePath -> FilePath -> Parser TimeOfDay
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> FilePath -> FilePath -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale FilePath
"%T%Q" (Text -> FilePath
unpack Text
d)
  Char
_ <- Char -> Parser Text Char
A.char Char
c
  TimeOfDay -> Parser TimeOfDay
forall (f :: * -> *) a. Applicative f => a -> f a
pure TimeOfDay
d'

-- | TimeOfDay parser, consumes separator
--
-- >>> A.parse (localtime' ',') "Jun 24 8:24AM,ok"
-- Done "ok" 2020-06-24 08:24:00
localtime' :: Char -> A.Parser LocalTime
localtime' :: Char -> Parser LocalTime
localtime' Char
c = do
  Text
d <- (Char -> Bool) -> Parser Text
A.takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)
  LocalTime
d' <- Bool -> TimeLocale -> FilePath -> FilePath -> Parser LocalTime
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> FilePath -> FilePath -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale FilePath
"%Y %b %e %k:%M%p" (Text -> FilePath
unpack (Text
"2020 " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
d))
  Char
_ <- Char -> Parser Text Char
A.char Char
c
  LocalTime -> Parser LocalTime
forall (f :: * -> *) a. Applicative f => a -> f a
pure LocalTime
d'

-- * Block list parsers

-- | Parser for a csv row of [Text].
--
-- >>> A.parseOnly (fields ',') "field1,field2\r"
-- Right ["field1","field2\r"]
fields :: Char -> A.Parser [Text]
fields :: Char -> Parser [Text]
fields Char
c =
  Char -> Parser Text
field_ Char
c Parser Text -> Parser () -> Parser [Text]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser ()
sep Char
c

-- | parser for a csv row of [Double]
--
-- >>> A.parseOnly (doubles ',') "1,2,3"
-- Right [1.0,2.0,3.0]
doubles :: Char -> A.Parser [Double]
doubles :: Char -> Parser [Double]
doubles Char
c = Parser Double
A.double Parser Double -> Parser () -> Parser [Double]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser ()
sep Char
c

-- | parser for a csv row of [Int]
--
-- >>> A.parseOnly (ints ',') "1,2,3"
-- Right [1,2,3]
ints :: Char -> A.Parser [Int]
ints :: Char -> Parser [Int]
ints Char
c = Parser Int -> Parser Int
forall a. Num a => Parser a -> Parser a
A.signed Parser Int
forall a. Integral a => Parser a
A.decimal Parser Int -> Parser () -> Parser [Int]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser ()
sep Char
c