{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedLabels #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE StrictData #-}
{-# LANGUAGE TypeFamilies #-}

-- | A csv process based on attoparsec and the box library
module Box.Csv
  ( CsvConfig (..),
    defaultCsvConfig,
    file,
    Header (..),
    rowEmitter,
    rowCommitter,
    runCsv,

    -- * parsers
    sep,
    field_,
    field,
    skipField_,
    skipField,
    int,
    int',
    A.double,
    double',
    fields,
    scis,
    ints,
    doubles,
    day',
    tod',
    localtime',
  )
where

import Box
import Control.Lens
import Control.Monad
import qualified Data.Attoparsec.Text as A
import Data.Generics.Labels ()
import Data.Scientific
import Data.Text (Text, unpack)
import qualified Data.Text as Text
import Data.Time
import GHC.Generics

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Box
-- >>> import Box.Csv
-- >>> import qualified Data.Text as Text
-- >>> import qualified Data.Attoparsec.Text as A

-- | csv file configuration
data CsvConfig = CsvConfig
  { -- | file name stem
    CsvConfig -> Text
name :: Text,
    -- | file suffix
    CsvConfig -> Text
suffix :: Text,
    -- | directory
    CsvConfig -> Text
dir :: Text,
    -- | field separator
    CsvConfig -> Char
fsep :: Char,
    -- | nature of header row(s)
    CsvConfig -> Header
header :: Header
  }
  deriving (Int -> CsvConfig -> ShowS
[CsvConfig] -> ShowS
CsvConfig -> String
(Int -> CsvConfig -> ShowS)
-> (CsvConfig -> String)
-> ([CsvConfig] -> ShowS)
-> Show CsvConfig
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CsvConfig] -> ShowS
$cshowList :: [CsvConfig] -> ShowS
show :: CsvConfig -> String
$cshow :: CsvConfig -> String
showsPrec :: Int -> CsvConfig -> ShowS
$cshowsPrec :: Int -> CsvConfig -> ShowS
Show, (forall x. CsvConfig -> Rep CsvConfig x)
-> (forall x. Rep CsvConfig x -> CsvConfig) -> Generic CsvConfig
forall x. Rep CsvConfig x -> CsvConfig
forall x. CsvConfig -> Rep CsvConfig x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep CsvConfig x -> CsvConfig
$cfrom :: forall x. CsvConfig -> Rep CsvConfig x
Generic, CsvConfig -> CsvConfig -> Bool
(CsvConfig -> CsvConfig -> Bool)
-> (CsvConfig -> CsvConfig -> Bool) -> Eq CsvConfig
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CsvConfig -> CsvConfig -> Bool
$c/= :: CsvConfig -> CsvConfig -> Bool
== :: CsvConfig -> CsvConfig -> Bool
$c== :: CsvConfig -> CsvConfig -> Bool
Eq)

-- | default csv file details
--
-- >>> defaultCsvConfig
-- CsvConfig {name = "time_series_covid19_deaths_global_narrow", suffix = ".csv", dir = "./other", fsep = ',', header = HasHXL}
--
-- test data from https://data.humdata.org/dataset/novel-coronavirus-2019-ncov-cases
defaultCsvConfig :: CsvConfig
defaultCsvConfig :: CsvConfig
defaultCsvConfig =
  Text -> Text -> Text -> Char -> Header -> CsvConfig
CsvConfig
    Text
"time_series_covid19_deaths_global_narrow"
    Text
".csv"
    Text
"./other"
    Char
','
    Header
HasHXL

-- | filepath for the config.
--
-- >>> file defaultCsvConfig
-- "./other/time_series_covid19_deaths_global_narrow.csv"
file :: CsvConfig -> FilePath
file :: CsvConfig -> String
file CsvConfig
cfg =
  CsvConfig
cfg CsvConfig -> Getting Text CsvConfig Text -> Text
forall s a. s -> Getting a s a -> a
^. IsLabel "dir" (Getting Text CsvConfig Text)
Getting Text CsvConfig Text
#dir
    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"/"
    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> CsvConfig
cfg CsvConfig -> Getting Text CsvConfig Text -> Text
forall s a. s -> Getting a s a -> a
^. IsLabel "name" (Getting Text CsvConfig Text)
Getting Text CsvConfig Text
#name
    Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> CsvConfig
cfg CsvConfig -> Getting Text CsvConfig Text -> Text
forall s a. s -> Getting a s a -> a
^. IsLabel "suffix" (Getting Text CsvConfig Text)
Getting Text CsvConfig Text
#suffix
      Text -> (Text -> String) -> String
forall a b. a -> (a -> b) -> b
& Text -> String
unpack

-- | Type of header rows.  Note the modern propensity for multiple header rows.
data Header = HasHeader | HasHXL | NoHeader deriving (Int -> Header -> ShowS
[Header] -> ShowS
Header -> String
(Int -> Header -> ShowS)
-> (Header -> String) -> ([Header] -> ShowS) -> Show Header
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Header] -> ShowS
$cshowList :: [Header] -> ShowS
show :: Header -> String
$cshow :: Header -> String
showsPrec :: Int -> Header -> ShowS
$cshowsPrec :: Int -> Header -> ShowS
Show, Header -> Header -> Bool
(Header -> Header -> Bool)
-> (Header -> Header -> Bool) -> Eq Header
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Header -> Header -> Bool
$c/= :: Header -> Header -> Bool
== :: Header -> Header -> Bool
$c== :: Header -> Header -> Bool
Eq)

-- | A continuation emitter of parsed csv rows from a CsvConfig, returning the original text on failure
-- >>> rowEmitter defaultCsvConfig fields `with` emit
-- Just (Right ["Province/State","Country/Region","Lat","Long","Date","Value","ISO 3166-1 Alpha 3-Codes","Region Code","Sub-region Code","Intermediate Region Code\r"])
rowEmitter :: CsvConfig -> (Char -> A.Parser a) -> Cont IO (Emitter IO (Either Text a))
rowEmitter :: CsvConfig
-> (Char -> Parser a) -> Cont IO (Emitter IO (Either Text a))
rowEmitter CsvConfig
cfg Char -> Parser a
p = Parser a -> Emitter IO Text -> Emitter IO (Either Text a)
forall (m :: * -> *) a.
Functor m =>
Parser a -> Emitter m Text -> Emitter m (Either Text a)
parseE (Char -> Parser a
p (Getting Char CsvConfig Char -> CsvConfig -> Char
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view IsLabel "fsep" (Getting Char CsvConfig Char)
Getting Char CsvConfig Char
#fsep CsvConfig
cfg)) (Emitter IO Text -> Emitter IO (Either Text a))
-> Cont IO (Emitter IO Text)
-> Cont IO (Emitter IO (Either Text a))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Cont IO (Emitter IO Text)
fileE (CsvConfig -> String
file CsvConfig
cfg)

-- | commits printed csv rows
--
-- >>> let testConfig = CsvConfig "test" ".csv" "./test" ',' NoHeader
-- >>> let ctest = rowCommitter testConfig (fmap (Text.intercalate "," . fmap (Text.pack . show)))
--
-- FIXME: fails if used outside this project.
-- > ctest `with` (\c -> commit c [[1..10::Int]])
-- True
--
-- > rowEmitter testConfig ints `with` emit
-- Just (Right [1,2,3,4,5,6,7,8,9,10])
rowCommitter :: CsvConfig -> (a -> [Text]) -> Cont IO (Committer IO a)
rowCommitter :: CsvConfig -> (a -> [Text]) -> Cont IO (Committer IO a)
rowCommitter CsvConfig
cfg a -> [Text]
f = (a -> Text) -> Committer IO Text -> Committer IO a
forall (f :: * -> *) a b. Contravariant f => (a -> b) -> f b -> f a
contramap (Text -> [Text] -> Text
Text.intercalate (Char -> Text
Text.singleton (Char -> Text) -> Char -> Text
forall a b. (a -> b) -> a -> b
$ Getting Char CsvConfig Char -> CsvConfig -> Char
forall s (m :: * -> *) a. MonadReader s m => Getting a s a -> m a
view IsLabel "fsep" (Getting Char CsvConfig Char)
Getting Char CsvConfig Char
#fsep CsvConfig
cfg) ([Text] -> Text) -> (a -> [Text]) -> a -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. a -> [Text]
f) (Committer IO Text -> Committer IO a)
-> Cont IO (Committer IO Text) -> Cont IO (Committer IO a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Cont IO (Committer IO Text)
fileWriteC (CsvConfig -> String
file CsvConfig
cfg)

-- | Run a parser across all lines of a file.
--
-- >>> r1 <- runCsv defaultCsvConfig fields
-- >>> length r1
-- 42562
--
-- >>> length [x | (Left x) <- r1]
-- 0
--
-- >>> take 2 $ drop 2 [x | (Right x) <- r1]
-- [["","Afghanistan","33.0","65.0","2020-06-29","733","AFG","142","34","\r"],["","Afghanistan","33.0","65.0","2020-06-28","721","AFG","142","34","\r"]]
runCsv :: CsvConfig -> (Char -> A.Parser a) -> IO [Either Text a]
runCsv :: CsvConfig -> (Char -> Parser a) -> IO [Either Text a]
runCsv CsvConfig
cfg Char -> Parser a
p = Cont IO (Emitter IO (Either Text a))
-> (Emitter IO (Either Text a) -> IO [Either Text a])
-> IO [Either Text a]
forall (m :: * -> *) a. Cont m a -> forall r. (a -> m r) -> m r
with (CsvConfig
-> (Char -> Parser a) -> Cont IO (Emitter IO (Either Text a))
forall a.
CsvConfig
-> (Char -> Parser a) -> Cont IO (Emitter IO (Either Text a))
rowEmitter CsvConfig
cfg Char -> Parser a
p) Emitter IO (Either Text a) -> IO [Either Text a]
forall (m :: * -> *) a. Monad m => Emitter m a -> m [a]
toListE

-- * low-level generic csv parser helpers

-- | Most parsing and building routines implicity assume a character acting as a separator of fields, and newlines separating rows.
--
-- >>> A.parse (sep ',') ",ok"
-- Done "ok" ()
sep :: Char -> A.Parser ()
sep :: Char -> Parser ()
sep Char
c = Parser Text Char -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Char -> Parser Text Char
A.char Char
c)

-- * single field parsers

-- | an unquoted field
-- Does not consume the separator token
--
-- >>> A.parse (field_ ',') "field,ok"
-- Done ",ok" "field"
field_ :: Char -> A.Parser Text
field_ :: Char -> Parser Text
field_ Char
c = (Char -> Bool) -> Parser Text
A.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c)

-- | an unquoted field
-- Consume the separator token
--
-- >>> A.parse (field ',') "field,ok"
-- Done "ok" "field"
field :: Char -> A.Parser Text
field :: Char -> Parser Text
field Char
c = (Char -> Bool) -> Parser Text
A.takeWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c) Parser Text -> Parser Text Char -> Parser Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
c

-- | skipping a field
--
-- >>> A.parse (skipField_ ',') "field,ok"
-- Done ",ok" ()
skipField_ :: Char -> A.Parser ()
skipField_ :: Char -> Parser ()
skipField_ Char
c = (Char -> Bool) -> Parser ()
A.skipWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c)

-- | skipping a field
--
-- >>> A.parse (skipField ',') "field,ok"
-- Done "ok" ()
skipField :: Char -> A.Parser ()
skipField :: Char -> Parser ()
skipField Char
c = (Char -> Bool) -> Parser ()
A.skipWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
c) Parser () -> Parser Text Char -> Parser ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
','

-- | int parser
--
-- >>> A.parse int "234,ok"
-- Done ",ok" 234
int :: A.Parser Int
int :: Parser Int
int = Parser Int
forall a. Integral a => Parser a
A.decimal

-- | int parser, consumes separator
--
-- >>> A.parse (int' ',') "234,ok"
-- Done "ok" 234
int' :: Char -> A.Parser Int
int' :: Char -> Parser Int
int' Char
c = Parser Int
forall a. Integral a => Parser a
A.decimal Parser Int -> Parser Text Char -> Parser Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
c

-- | double parser, consumes separator
--
-- >>> A.parse (double' ',') "234.000,ok"
-- Done "ok" 234.0
double' :: Char -> A.Parser Double
double' :: Char -> Parser Double
double' Char
c = Parser Double
A.double Parser Double -> Parser Text Char -> Parser Double
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Text Char
A.char Char
c

-- | Day parser, consumes separator
--
-- >>> A.parse (day' ',') "2020-07-01,ok"
-- Done "ok" 2020-07-01
day' :: Char -> A.Parser Day
day' :: Char -> Parser Day
day' Char
c = do
  Text
d <- (Char -> Bool) -> Parser Text
A.takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)
  Day
d' <- Bool -> TimeLocale -> String -> String -> Parser Day
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale String
"%F" (Text -> String
unpack Text
d)
  Char
_ <- Char -> Parser Text Char
A.char Char
c
  Day -> Parser Day
forall (f :: * -> *) a. Applicative f => a -> f a
pure Day
d'

-- | TimeOfDay parser, consumes separator
--
-- >>> A.parse (tod' ',') "23:52:05.221109,ok"
-- Done "ok" 23:52:05.221109
tod' :: Char -> A.Parser TimeOfDay
tod' :: Char -> Parser TimeOfDay
tod' Char
c = do
  Text
d <- (Char -> Bool) -> Parser Text
A.takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)
  TimeOfDay
d' <- Bool -> TimeLocale -> String -> String -> Parser TimeOfDay
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale String
"%T%Q" (Text -> String
unpack Text
d)
  Char
_ <- Char -> Parser Text Char
A.char Char
c
  TimeOfDay -> Parser TimeOfDay
forall (f :: * -> *) a. Applicative f => a -> f a
pure TimeOfDay
d'

-- | TimeOfDay parser, consumes separator
--
-- >>> A.parse (localtime' ',') "Jun 24 8:24AM,ok"
-- Done "ok" 2020-06-24 08:24:00
localtime' :: Char -> A.Parser LocalTime
localtime' :: Char -> Parser LocalTime
localtime' Char
c = do
  Text
d <- (Char -> Bool) -> Parser Text
A.takeTill (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
c)
  LocalTime
d' <- Bool -> TimeLocale -> String -> String -> Parser LocalTime
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
False TimeLocale
defaultTimeLocale String
"%Y %b %e %k:%M%p" (Text -> String
unpack (Text
"2020 " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
d))
  Char
_ <- Char -> Parser Text Char
A.char Char
c
  LocalTime -> Parser LocalTime
forall (f :: * -> *) a. Applicative f => a -> f a
pure LocalTime
d'

-- * Block list parsers

-- | Parser for a csv row of [Text].
-- TODO: deal with potential for an extra '\r'
--
-- >>> A.parseOnly (fields ',') "field1,field2\r"
-- Right ["field1","field2\r"]
fields :: Char -> A.Parser [Text]
fields :: Char -> Parser [Text]
fields Char
c =
  Char -> Parser Text
field_ Char
c Parser Text -> Parser () -> Parser [Text]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser ()
sep Char
c

-- | parser for a csv row of [Scientific]
--
-- >>> A.parseOnly (scis ',') "1,2.2,3.3"
-- Right [1.0,2.2,3.3]
scis :: Char -> A.Parser [Scientific]
scis :: Char -> Parser [Scientific]
scis Char
c = Parser Scientific
A.scientific Parser Scientific -> Parser () -> Parser [Scientific]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser ()
sep Char
c

-- | parser for a csv row of [Double]
--
-- >>> A.parseOnly (doubles ',') "1,2,3"
-- Right [1.0,2.0,3.0]
doubles :: Char -> A.Parser [Double]
doubles :: Char -> Parser [Double]
doubles Char
c = Parser Double
A.double Parser Double -> Parser () -> Parser [Double]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser ()
sep Char
c

-- | parser for a csv row of [Int]
--
-- >>> A.parseOnly (ints ',') "1,2,3"
-- Right [1,2,3]
ints :: Char -> A.Parser [Int]
ints :: Char -> Parser [Int]
ints Char
c = Parser Int -> Parser Int
forall a. Num a => Parser a -> Parser a
A.signed Parser Int
forall a. Integral a => Parser a
A.decimal Parser Int -> Parser () -> Parser [Int]
forall (f :: * -> *) a s. Alternative f => f a -> f s -> f [a]
`A.sepBy1` Char -> Parser ()
sep Char
c