{-|
Module      : Data.Csv.Lens
Description : A lensy layer on top of Cassava which affords streaming, traversable, CSV parsing.
Copyright   : (c) Chris Penner, 2019
License     : BSD3

The examples below use the following csv as the value @myCsv@:

> state_code,population
> NY,19540000
> CA,39560000

-}

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Data.Csv.Lens
    ( namedCsv
    , csv
    , headers
    , rows
    , row
    , columns
    , columns'
    , column
    , column'
    , _Record
    , _Record'
    , _NamedRecord
    , _NamedRecord'
    , _Field
    , _Field'
    , Csv'
    , CsvRecord
    , cassavaNamed
    , cassavaUnnamed
    , adjustingOutputHeaders
    ) where

import Control.Lens
import qualified Data.ByteString.Lazy as BL hiding (putStrLn)
import Data.Csv hiding (index)
import qualified Data.Csv.Streaming as S
import Data.Foldable
import Data.Either
import GHC.TypeLits
import Data.Kind
import Data.Type.Equality

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> :set -XTypeApplications
-- >>> :set -XDataKinds
-- >>> import qualified Data.Map as M
-- >>> import qualified Data.ByteString.Lazy.Char8 as BL
-- >>> myCsv <- BL.readFile "./data/simple.csv"


-- | A CSV Record which carries a type-level witness of whether the record is named or not.
--
-- A csv record with named columns has type @'CsvRecord' 'Name'@ where 'Name' is simply an alias for 'BL.ByteString'
--
-- A csv record with numbered columns has type @'CsvRecord' 'Int'@
data CsvRecord i where
  NamedCsvRecord :: NamedRecord -> CsvRecord Name
  CsvRecord :: Record -> CsvRecord Int

instance Show (CsvRecord i) where
  showsPrec 11 r s = "(" <> showsPrec 0 r ")" <> s
  showsPrec _ (CsvRecord r) s = "CsvRecord (" <> show r <> ")" <> s
  showsPrec _ (NamedCsvRecord r) s = "NamedCsvRecord (" <> show r <> ")" <> s

type instance Index (CsvRecord i) = i
type instance IxValue (CsvRecord i) = Field

-- | 'CsvRecord's is indexable using 'ix' by either 'Int' for numbered columns or a 'Name' for
-- named columns.
instance Ixed (CsvRecord i) where
  ix i f (NamedCsvRecord r) = NamedCsvRecord <$> (r & ix i %%~ f)
  ix i f (CsvRecord r) = CsvRecord <$> (r & ix i %%~ f)

-- | 'Csv'' is a wrapper around cassava's csv type which carries the appropriate indexing
-- and column header information.
data Csv' i where
  NamedCsv :: Header -> S.Records NamedRecord -> Csv' Name
  UnnamedCsv :: S.Records Record -> Csv' Int

type instance Index (Csv' i) = Int
type instance IxValue (Csv' i) = CsvRecord i

-- | A 'Csv'' is indexable using 'ix' by either 'Int' or 'Name' respectively.
instance Ixed (Csv' i) where
  ix i = rows . index i

instance ToNamedRecord (CsvRecord Name) where
  toNamedRecord (NamedCsvRecord r) = r

instance ToRecord (CsvRecord Int) where
  toRecord (CsvRecord r) = r

instance FromNamedRecord (CsvRecord Name) where
  parseNamedRecord r = pure $ NamedCsvRecord r

instance FromRecord (CsvRecord Int) where
  parseRecord r = pure $ CsvRecord r


-- | An iso between the results of 'S.decodeByName' or 'S.decodeByNameWith' and a 'Csv'' for use with this library.
--
-- >>> S.decode HasHeader myCsv ^.. from cassavaUnnamed  . rows . column @String 0
-- ["NY","CA"]
cassavaUnnamed :: Iso' (Csv' Int) (S.Records Record)
cassavaUnnamed = iso (\(UnnamedCsv rs) -> rs) UnnamedCsv

-- | An iso between the results of 'S.decode' or 'S.decodeWith' and a 'Csv'' for use with this library.
--
-- You should typically just use 'namedCsv', but this can be helpful if you want to provide
-- special options to provide custom decoding options.
--
-- >>> S.decodeByName  myCsv ^.. _Right . from cassavaNamed . rows . column @String "state_code"
-- ["NY","CA"]
cassavaNamed :: Iso' (Csv' Name) (Header, S.Records NamedRecord)
cassavaNamed = iso (\(NamedCsv h rs) -> (h, rs)) (uncurry NamedCsv)

-- | A prism which attempts to parse a 'BL.ByteString' into a structured @'Csv'' 'Name'@.
--
-- This uses the first row of the csv as headers.
--
-- Note that this prism will silently fail to match if your CSV is malformed.
-- Follow up with 'rows', 'row', or 'headers'
--
-- >>> :t  myCsv ^? namedCsv
-- myCsv ^? namedCsv :: Maybe (Csv' Name)
namedCsv :: Prism' BL.ByteString (Csv' Name)
namedCsv = prism' embed project
  where
    embed :: Csv' Name -> BL.ByteString
    embed (NamedCsv headers xs) = encodeByName headers (toList xs)
    project :: BL.ByteString -> Maybe (Csv' Name)
    project = fmap (uncurry NamedCsv) . preview _Right . S.decodeByName

-- | A prism which attempts to parse a 'BL.ByteString' into a structured @'Csv'' 'Int'@.
--
-- Use this with CSVs which don't have a header row.
--
-- Note that this prism will silently fail to match if your CSV is malformed.
-- Follow up with 'rows' or 'row'
--
-- >>> :t  myCsv ^? csv
-- myCsv ^? csv :: Maybe (Csv' Int)
csv :: Iso' BL.ByteString (Csv' Int)
csv = iso project embed
  where
    embed :: Csv' Int -> BL.ByteString
    embed (UnnamedCsv xs) = encode (toList xs)
    project :: BL.ByteString -> (Csv' Int)
    project = UnnamedCsv . S.decode NoHeader

unpackRecordWithName :: CsvRecord Name -> NamedRecord
unpackRecordWithName (NamedCsvRecord r) = r

unpackRecordWithIndex :: CsvRecord Int -> Record
unpackRecordWithIndex (CsvRecord r) = r

-- | An indexed fold over the CSV headers of a named CSV. Indexed by the column number
-- starting at 0.
--
-- >>> myCsv ^.. namedCsv . headers
-- ["state_code","population"]
--
-- >>> myCsv ^@.. namedCsv . headers
-- [(0,"state_code"),(1,"population")]
headers :: IndexedTraversal' Int (Csv' Name) Name
-- Note to self, this could technically be a traversal, but since we don't want to reparse all
-- records with the new headers we don't yet allow editing headers.
headers  f (NamedCsv h xs) = flip NamedCsv xs <$> (h & traversed %%@~ indexed f)

-- | Allows rewriting/adding/removing headers on the CSV both before serializing
-- Note that rewriting a header name DOES NOT affect any of the records, it only affects the
-- choice and order of the columns in the output CSV. If you want to rename a column header
-- you must also rename the name of that field on all rows in the csv.
--
-- This is a limitation of cassava itself.
--
-- Examples:
--
-- Drop the first column:
--
-- >>> BL.lines (myCsv & namedCsv . adjustingOutputHeaders (view _tail) %~ id)
-- ["population\r","19540000\r","39560000\r"]
--
-- Add a new column with the population in millions
--
-- >>> import Data.Char (toLower)
-- >>> addStateLower m = M.insert "state_lower" (m ^. ix "state_code" . to (map toLower)) m
-- >>> :{
--  BL.lines (myCsv
--    & namedCsv
--    -- Add "state_lower" to output headers so it will be serialized
--    . adjustingOutputHeaders (<> pure "state_lower")
--    . rows
--    . _NamedRecord @(M.Map String String)
--    -- Add "state_lower" to each record
--    %~ addStateLower
--           )
-- :}
-- ["state_code,population,state_lower\r","NY,19540000,ny\r","CA,39560000,ca\r"]
--
-- Reverse column order
-- >>> BL.lines (myCsv & namedCsv . adjustingOutputHeaders (view reversed) %~ id)
-- ["population,state_code\r","19540000,NY\r","39560000,CA\r"]
--
adjustingOutputHeaders :: (Header -> Header) -- ^ Adjust headers for the serialization step
                    -> Iso' (Csv' Name) (Csv' Name)
adjustingOutputHeaders f = iso id (\(NamedCsv h xs) -> NamedCsv (f h) xs)

-- | An indexed traversal over each row of the csv as a 'CsvRecord'. Passes through
-- a type witness signifying whether the records are 'Name' or 'Int' indexed.
--
-- Traversing rows of a named csv results in named records:
--
-- >>> myCsv ^.. namedCsv . rows
-- [NamedCsvRecord (fromList [("population","19540000"),("state_code","NY")]),NamedCsvRecord (fromList [("population","39560000"),("state_code","CA")])]
--
-- Traversing rows of an indexed csv results in indexed records:
--
-- >>> myCsv ^.. csv . dropping 1 rows
-- [CsvRecord (["NY","19540000"]),CsvRecord (["CA","39560000"])]
rows :: IndexedTraversal' Int (Csv' i) (CsvRecord i)
rows f (NamedCsv h xs) = NamedCsv h . fmap unpackRecordWithName <$> (xs & traversed %%@~ \i x -> indexed f i (NamedCsvRecord x))
rows f (UnnamedCsv xs) = UnnamedCsv . fmap unpackRecordWithIndex <$> (xs & traversed %%@~ \i x -> indexed f i (CsvRecord x))

-- | Parse and traverse the fields of a 'CsvRecord' into the inferred 'FromField' type.
-- Focuses are indexed by either the column headers or column number accordingly.
--
-- Be careful to provide appropriate type hints to 'columns' so that it knows which 'Field'
-- type to parse into, any fields which fail to parse will be simply ignored, you can use this
-- strategically to select all fields of a given type within a record.
--
-- >>> myCsv ^.. namedCsv . row 0 . columns @String
-- ["19540000","NY"]
--
-- >>> myCsv ^.. namedCsv . row 0 . columns @Int
-- [19540000]
--
-- 'columns' is indexed, you can use the column number or column header.
--
-- >>> myCsv ^@.. namedCsv . row 0 . columns @String
-- [("population","19540000"),("state_code","NY")]
--
-- >>> myCsv ^@.. namedCsv . row 0 . columns @Int
-- [("population",19540000)]
--
--
-- >>> BL.lines (myCsv & namedCsv . rows . columns @Int %~ subtract 1)
-- ["state_code,population\r","NY,19539999\r","CA,39559999\r"]
columns :: forall a i. (ToField a, FromField a) => IndexedTraversal' i (CsvRecord i) a
columns = columns'

-- | A more flexible version of 'columns' which allows the focused field to change types. Affords worse type inference, so prefer 'columns' when possible.
--
-- See 'columns' for usage examples
columns' :: forall a b i. (FromField a, ToField b) => IndexedTraversal i (CsvRecord i) (CsvRecord i) a b
columns' = cols . _Field'
  where
    cols :: IndexedTraversal' i (CsvRecord i) Field
    cols f (CsvRecord r) = CsvRecord <$> (r & itraversed %%@~ indexed f)
    cols f (NamedCsvRecord r) = NamedCsvRecord <$> (r & itraversed %%@~ indexed f)

-- | Select a specific column of a record by the appropriate index type, either 'Name' for 'namedCsv's or 'Int' for 'csv's
--
-- See 'columns' for more usage ideas.
--
-- >>> myCsv ^.. namedCsv . rows . column @Int "population"
-- [19540000,39560000]
--
-- >>> myCsv ^.. csv . dropping 1 rows . column @String 0
-- ["NY","CA"]
column :: forall a b i. (Eq i, FromField a, ToField a) => i -> IndexedTraversal' i (CsvRecord i) a
column i = column' i

-- | A more flexible version of 'column' which allows the focused field to change types. Affords worse type inference, so prefer 'column' when possible.
--
-- See 'column' for usage examples
column' :: forall a b i. (Eq i, FromField a, ToField b) => i -> IndexedTraversal i (CsvRecord i) (CsvRecord i) a b
column' i =  t . _Field'
  where
    t :: IndexedTraversal' i (CsvRecord i) Field
    t f x = x & ix i %%~ indexed f i

-- | Traverse a specific row of the csv by row number.
row :: Int -> IndexedTraversal' Int (Csv' i) (CsvRecord i)
row i f x = x & ix i %%~ indexed f i

-- | A prism which attempt to parse the given record into a type using 'FromRecord'.
--
-- Tuples implement 'FromRecord':
--
-- >>> myCsv ^.. csv . row 1 . _Record @(String, Int)
-- [("NY",19540000)]
--
-- If we parse each row into a tuple record we can swap the positions and it will write back
-- into a valid CSV.
--
-- >>> import Data.Tuple (swap)
-- >>> BL.lines (myCsv & csv . rows . _Record @(String, String) %~ swap)
-- ["population,state_code\r","19540000,NY\r","39560000,CA\r"]
_Record :: forall a b. (FromRecord a, ToRecord a) => Prism' (CsvRecord Int) a
_Record = _Record'

-- | A more flexible version of '_Record' which allows the focus to change types. Affords worse type inference, so prefer '_Record' when possible.
--
-- See '_Record' for usage examples
_Record' :: forall a b. (FromRecord a, ToRecord b) => Prism (CsvRecord Int) (CsvRecord Int) a b
_Record' = prism embed project
  where
    project :: CsvRecord Int -> Either (CsvRecord Int) a
    project (CsvRecord r) =
      case runParser (parseRecord r) of
        Left _ -> Left (CsvRecord r)
        Right a -> Right a
    embed :: b -> CsvRecord Int
    embed = CsvRecord . toRecord

-- | Attempt to parse the given record into a type using 'FromNamedRecord'.
--
-- >>> myCsv ^? namedCsv . row 0 . _NamedRecord @(M.Map String String)
-- Just (fromList [("population","19540000"),("state_code","NY")])
_NamedRecord :: forall a b. (FromNamedRecord a, ToNamedRecord a)
             => Prism' (CsvRecord Name) a
_NamedRecord = _NamedRecord'

-- | A more flexible version of '_NamedRecord' which allows the focus to change types. Affords worse type inference, so prefer '_NamedRecord' when possible.
--
-- See '_NamedRecord' for usage examples
_NamedRecord' :: forall a b. (FromNamedRecord a, ToNamedRecord b)
              => Prism (CsvRecord Name) (CsvRecord Name) a b
_NamedRecord' = prism embed project
  where
    project :: CsvRecord Name -> Either (CsvRecord Name) a
    project (NamedCsvRecord r) =
      case runParser (parseNamedRecord r) of
        Left _ -> Left (NamedCsvRecord r)
        Right a -> Right a
    embed :: b -> CsvRecord Name
    embed = NamedCsvRecord . toNamedRecord

-- | Attempt to parse the given 'Field' into a type using 'FromField'.
--
-- You usually won't need this, 'column', 'columns', '_Record', and '_NamedRecord' are usually more flexible and provide more power.
_Field :: forall a. (FromField a, ToField a) => Prism' Field a
_Field = _Field'

-- | A more flexible version of '_Field' which allows the focus to change types. Affords worse type inference, so prefer '_Field' when possible.
--
-- You usually won't need this, 'column', 'columns', '_Record', and '_NamedRecord' are usually more flexible and provide more power.
_Field' :: forall a b. (FromField a, ToField b) => Prism Field Field a b
_Field' = prism embed project
  where
    project s = either (const $ Left s) Right . runParser . parseField $ s
    embed = toField