{-# LANGUAGE DefaultSignatures #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FunctionalDependencies #-} {-# LANGUAGE FlexibleInstances#-} {-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveTraversable #-} {-# LANGUAGE DeriveGeneric #-} {-| Module : Data.Sv.Syntax.Sv Copyright : (C) CSIRO 2017-2018 License : BSD3 Maintainer : George Wilson Stability : experimental Portability : non-portable This file defines a datatype for a complete Sv document. The datatype preserves information such as whitespace so that the original text can be recovered. In the usual workflow, this type is only an intermediate stage between parsing and decoding. You can program against it directly using the provided functions and optics if you'd like. For an example of this see -} module Data.Sv.Syntax.Sv ( Sv (Sv, _separatorSv, _maybeHeader, _records, _finalNewlines) , HasSv (sv, maybeHeader, traverseHeader, finalNewlines) , HasRecords (records, traverseNewlines, traverseRecords) , mkSv , emptySv , recordList , Header (Header, _headerRecord) , HasHeader (header, headerRecord, headerNewline) , noHeader , mkHeader , Headedness (Unheaded, Headed) , HasHeadedness (headedness) , getHeadedness , Separator , HasSeparator (separator) , comma , pipe , tab ) where import Control.DeepSeq (NFData) import Control.Lens (Lens, Lens', Traversal') import Data.Foldable (Foldable (foldMap)) import Data.Functor (Functor (fmap), (<$>)) import Data.Monoid ((<>)) import Data.Traversable (Traversable (traverse)) import GHC.Generics (Generic) import Data.Sv.Syntax.Field (HasFields (fields)) import Data.Sv.Syntax.Record (Record, Records (EmptyRecords), HasRecord (record), HasRecords (records, traverseNewlines, traverseRecords), recordList) import Text.Newline (Newline) -- | 'Sv' is a whitespace-preserving data type for separated values. -- Often the separator is a comma, but this type does not make that -- assumption so that it can be used for pipe- or tab-separated values -- as well. data Sv s = Sv { _separatorSv :: Separator , _maybeHeader :: Maybe (Header s) , _records :: Records s , _finalNewlines :: [Newline] } deriving (Eq, Ord, Show, Generic) instance NFData s => NFData (Sv s) -- | Classy lenses for 'Sv' class (HasRecords c s, HasSeparator c) => HasSv c s | c -> s where sv :: Lens' c (Sv s) maybeHeader :: Lens' c (Maybe (Header s)) {-# INLINE maybeHeader #-} traverseHeader :: Traversal' c (Header s) {-# INLINE traverseHeader #-} finalNewlines :: Lens' c [Newline] {-# INLINE finalNewlines #-} maybeHeader = sv . maybeHeader traverseHeader = maybeHeader . traverse finalNewlines = sv . finalNewlines instance HasRecords (Sv s) s where records f (Sv x1 x2 x3 x4) = fmap (\y -> Sv x1 x2 y x4) (f x3) {-# INLINE records #-} instance HasSv (Sv s) s where sv = id {-# INLINE sv #-} maybeHeader f (Sv x1 x2 x3 x4) = fmap (\y -> Sv x1 y x3 x4) (f x2) {-# INLINE maybeHeader #-} finalNewlines f (Sv x1 x2 x3 x4) = fmap (Sv x1 x2 x3) (f x4) {-# INLINE finalNewlines #-} -- | Convenience constructor for Sv mkSv :: Separator -> Maybe (Header s) -> [Newline] -> Records s -> Sv s mkSv c h ns rs = Sv c h rs ns -- | An empty Sv emptySv :: Separator -> Sv s emptySv c = Sv c Nothing EmptyRecords [] instance Functor Sv where fmap f (Sv s h rs e) = Sv s (fmap (fmap f) h) (fmap f rs) e instance Foldable Sv where foldMap f (Sv _ h rs _) = foldMap (foldMap f) h <> foldMap f rs instance Traversable Sv where traverse f (Sv s h rs e) = Sv s <$> traverse (traverse f) h <*> traverse f rs <*> pure e -- | Determine the 'Headedness' of an 'Sv' getHeadedness :: Sv s -> Headedness getHeadedness = maybe Unheaded (const Headed) . _maybeHeader -- | A 'Header' is present in many CSV documents, usually listing the names -- of the columns. We keep this separate from the regular records. data Header s = Header { _headerRecord :: Record s , _headerNewline :: Newline } deriving (Eq, Ord, Show, Functor, Foldable, Traversable, Generic) instance NFData s => NFData (Header s) -- | Classy lenses for 'Header' class HasHeader s t a b | s -> a, t -> b, s b -> t, t a -> s where header :: Lens s t (Header a) (Header b) headerNewline :: (s ~ t) => Lens s t Newline Newline {-# INLINE headerNewline #-} headerRecord :: Lens s t (Record a) (Record b) {-# INLINE headerRecord #-} default headerNewline :: (a ~ b) => Lens s t Newline Newline headerNewline = header . headerNewline headerRecord = header . headerRecord instance HasHeader (Header a) (Header b) a b where header = id {-# INLINE header #-} headerNewline f (Header x1 x2) = fmap (Header x1) (f x2) {-# INLINE headerNewline #-} headerRecord f (Header x1 x2) = fmap (\y -> Header y x2) (f x1) {-# INLINE headerRecord #-} instance HasRecord (Header a) (Header b) a b where record = headerRecord {-# INLINE record #-} instance HasFields (Header a) (Header b) a b where fields = headerRecord . fields -- | Used to build 'Sv's that don't have a header noHeader :: Maybe (Header s) noHeader = Nothing -- | Convenience constructor for 'Header', usually when you're building 'Sv's mkHeader :: Record s -> Newline -> Maybe (Header s) mkHeader r n = Just (Header r n) -- | Does the 'Sv' have a 'Header' or not? A header is a row at the beginning -- of a file which contains the string names of each of the columns. -- -- If a header is present, it must not be decoded with the rest of the data. data Headedness = Unheaded | Headed deriving (Eq, Ord, Show) -- | Classy lens for 'Headedness' class HasHeadedness c where headedness :: Lens' c Headedness instance HasHeadedness Headedness where headedness = id -- | By what are your values separated? The answer is often 'comma', but not always. -- -- A 'Separator' is just a 'Char'. It could be a sum type instead, since it -- will usually be comma or pipe, but our preference has been to be open here -- so that you can use whatever you'd like. There are test cases, for example, -- ensuring that you're free to use null-byte separated values if you so desire. type Separator = Char -- | Classy lens for 'Separator' class HasSeparator c where separator :: Lens' c Separator instance HasSeparator Char where separator = id {-# INLINE separator #-} instance HasSeparator (Sv s) where separator f (Sv x1 x2 x3 x4) = fmap (\y -> Sv y x2 x3 x4) (f x1) {-# INLINE separator #-} -- | The venerable comma separator. Used for CSV documents. comma :: Separator comma = ',' -- | The pipe separator. Used for PSV documents. pipe :: Separator pipe = '|' -- | Tab is a separator too - why not? tab :: Separator tab = '\t'