{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} {-| Module : Data.Sv.Encode.Core Copyright : (C) CSIRO 2017-2019 License : BSD3 Maintainer : George Wilson Stability : experimental Portability : non-portable This module is intended to be imported qualified as follows @import Data.Sv.Encode.Core as E@ To produce a CSV file from data types, build an 'Encode' for your data type. This module contains primitives, combinators, and type class instances to help you to do so. 'Encode' is a 'Contravariant' functor, as well as a 'Divisible' and 'Decidable'. 'Divisible' is the contravariant form of 'Applicative', while 'Decidable' is the contravariant form of 'Control.Applicative.Alternative'. These type classes will provide useful combinators for working with 'Encode's. Specialised to 'Encode', the function 'Data.Functor.Contravariant.Divisible.divide' from 'Divisible' has the type: @ divide :: (a -> (b,c)) -> Encode b -> Encode c -> Encode a @ which can be read "if 'a' can be split into 'b' and 'c', and I can handle 'b', and I can handle 'c', then I can handle 'a'". Here the "I can handle" part corresponds to the 'Encode'. If we think of (covariant) functors as being "full of" 'a', then we can think of contravariant functors as being "able to handle" 'a'. How does it work? Perform the split on the 'a', handle the 'b' by converting it into some text, handle the 'c' by also converting it to some text, then put each of those text fragments into their own field in the CSV. Similarly, the function 'Data.Functor.Contravariant.Divisible.choose' from 'Decidable', specialsed to 'Encode', has the type: @ choose :: (a -> Either b c) -> Encode b -> Encode c -> Encode a @ which can be read "if 'a' is either 'b' or 'c', and I can handle 'b', and I can handle 'c', then I can handle 'a'". This works by performing the split, then checking whether 'b' or 'c' resulted, then using the appropriate 'Encode'. For an example of encoding, see -} module Data.Sv.Encode.Core ( Encode (..) -- * Convenience constructors , mkEncodeBS , mkEncodeWithOpts -- * Running an Encode , encode , encodeNamed , encodeToHandle , encodeNamedToHandle , encodeToFile , encodeNamedToFile , encodeBuilder , encodeNamedBuilder , encodeRow , encodeRowBuilder -- * Options , module Data.Sv.Encode.Options -- * Primitive encodes -- ** Name-based , named , (=:) -- ** Field-based , const , show , nop , empty , orEmpty , char , int , integer , float , double , doubleFast , boolTrueFalse , booltruefalse , boolyesno , boolYesNo , boolYN , bool10 , string , text , byteString , lazyByteString -- ** Row-based , row -- * Combinators , (?>) , (>) , (<), mconcat) import Data.Sequence (Seq) import qualified Data.Sequence as Seq import qualified Data.Text as T import qualified Data.Text.Encoding as T import GHC.Word (Word8) import System.IO (BufferMode (BlockBuffering), Handle, hClose, hSetBinaryMode, hSetBuffering, openFile, IOMode (WriteMode)) import Data.Sv.Alien.Containers (intersperseSeq) import Data.Sv.Encode.Options (EncodeOptions (EncodeOptions, _encodeSeparator, _newline, _terminalNewline, _quoting), HasEncodeOptions (), HasSeparator (separator), defaultEncodeOptions, Quoting (Always, AsNeeded, Never)) import Data.Sv.Encode.Type (Encode (Encode, getEncode), NameEncode (NameEncode, unNamedE)) import Data.Sv.Structure.Newline (newlineToBuilder) -- | Make an 'Encode' from a function that builds one 'Field'. mkEncodeBS :: (a -> LBS.ByteString) -> Encode a mkEncodeBS = unsafeBuilder . fmap BS.lazyByteString -- | Make an 'Encode' from a function that builds one 'Field'. mkEncodeWithOpts :: (EncodeOptions -> a -> BS.Builder) -> Encode a mkEncodeWithOpts = Encode . fmap (fmap pure) -- | Make an encode from any function that returns a ByteString 'Builder'. unsafeBuilder :: (a -> BS.Builder) -> Encode a unsafeBuilder b = Encode (\_ a -> pure (b a)) {-# INLINE unsafeBuilder #-} -- | Encode the given list using the given 'Encode', configured by the given -- 'EncodeOptions'. encode :: Encode a -> EncodeOptions -> [a] -> LBS.ByteString encode enc opts = BS.toLazyByteString . encodeBuilder enc opts -- | Encode the given list with a header using the given 'NameEncode', -- configured by the given 'EncodeOptions'. encodeNamed :: NameEncode a -> EncodeOptions -> [a] -> LBS.ByteString encodeNamed enc opts = BS.toLazyByteString . encodeNamedBuilder enc opts -- | Encode, writing the output to a file handle. encodeToHandle :: Encode a -> EncodeOptions -> [a] -> Handle -> IO () encodeToHandle enc opts as h = BS.hPutBuilder h (encodeBuilder enc opts as) -- | Encode with a header, writing the output to a file handle. encodeNamedToHandle :: NameEncode a -> EncodeOptions -> [a] -> Handle -> IO () encodeNamedToHandle enc opts as h = BS.hPutBuilder h (encodeNamedBuilder enc opts as) -- | Encode, writing to a file. This way is more efficient than encoding to -- a 'ByteString' and then writing to file. encodeToFile :: Encode a -> EncodeOptions -> [a] -> FilePath -> IO () encodeToFile = genericEncodeToFile encodeToHandle -- | Encode with a header, writing to a file. This way is more efficient -- than encoding to a 'ByteString' and then writing to file. encodeNamedToFile :: NameEncode a -> EncodeOptions -> [a] -> FilePath -> IO () encodeNamedToFile = genericEncodeToFile encodeNamedToHandle genericEncodeToFile :: (enc -> EncodeOptions -> [a] -> Handle -> IO ()) -> enc -> EncodeOptions -> [a] -> FilePath -> IO () genericEncodeToFile encHandle enc opts as fp = do h <- openFile fp WriteMode hSetBuffering h (BlockBuffering Nothing) hSetBinaryMode h True encHandle enc opts as h hClose h {-# INLINE genericEncodeToFile #-} -- | Encode to a ByteString 'Builder', which is useful if you are going -- to combine the output with other 'ByteString's. encodeBuilder :: Encode a -> EncodeOptions -> [a] -> BS.Builder encodeBuilder e opts as = let enc = encodeRowBuilder e opts nl = newlineToBuilder (_newline opts) terminal = if _terminalNewline opts then nl else mempty in case as of [] -> terminal (a:as') -> enc a <> mconcat [nl <> enc a' | a' <- as'] <> terminal -- | Encode with column names to a ByteString 'Builder', which is useful -- if you are going to combine the output with other 'ByteString's. encodeNamedBuilder :: NameEncode a -> EncodeOptions -> [a] -> BS.Builder encodeNamedBuilder ne opts as = case runNamed ne of (e, builders) -> let mkHeader = fold . addSeparators opts . addQuoting opts addQuoting = fmap . enquote nl = newlineToBuilder (_newline opts) header = mkHeader builders in header <> case as of [] -> if _terminalNewline opts then nl else mempty (_:_) -> nl <> encodeBuilder e opts as -- | Encode one row only encodeRow :: Encode a -> EncodeOptions -> a -> LBS.ByteString encodeRow e opts = BS.toLazyByteString . encodeRowBuilder e opts -- | Encode one row only, as a ByteString 'Builder' encodeRowBuilder :: Encode a -> EncodeOptions -> a -> BS.Builder encodeRowBuilder e opts = fold . addSeparators opts . getEncode e opts addSeparators :: HasSeparator s => s -> Seq BS.Builder -> Seq BS.Builder addSeparators opts = intersperseSeq (BS.word8 (view separator opts)) {-# INLINE addSeparators #-} -- | Encode this 'Data.ByteString.ByteString' every time, ignoring the input. const :: Strict.ByteString -> Encode a const b = contramap (pure b) byteString -- | Build an 'Encode' using a type's 'Show' instance. show :: Show a => Encode a show = contramap P.show string -- | Don't encode anything. nop :: Encode a nop = conquer -- | Encode anything as the empty string. empty :: Encode a empty = Encode (pure (pure (pure mempty))) -- | Lift an Encode to be able to hanlde 'Maybe', by using the empty string -- in the case of 'Nothing' orEmpty :: Encode a -> Encode (Maybe a) orEmpty = choose (maybe (Left ()) Right) empty -- | Build an 'Encode' for 'Maybe' given a 'Just' and a 'Nothing' encode. (?>) :: Encode a -> Encode () -> Encode (Maybe a) (?>) = flip () #-} -- | Build an 'Encode' for 'Maybe' given a 'Nothing' and a 'Just' encode. ( Encode a -> Encode (Maybe a) (>) :: Encode a -> Strict.ByteString -> Encode (Maybe a) (?>>) a s = a ?> const s {-# INLINE (?>>) #-} -- | Build an 'Encode' for 'Maybe' given a 'Data.ByteString.Strict.ByteString' -- for the 'Nothing' case and a 'Just' encode. (< Encode a -> Encode (Maybe a) (<>) {-# INLINE (< Encode [s] row enc = Encode $ \opts list -> join $ Seq.fromList $ fmap (getEncode enc opts) list -- | Encode a single 'Char' char :: Encode Char char = escaped BS.charUtf8 quotingIsNecessary :: EncodeOptions -> LBS.ByteString -> Bool quotingIsNecessary opts bs = LBS.any p bs where sep = _encodeSeparator opts p :: Word8 -> Bool p w = w == sep || w == 10 || -- lf w == 13 || -- cr w == 34 -- double quote enquote :: EncodeOptions -> BS.Builder -> BS.Builder enquote opts s = let lbs = BS.toLazyByteString s quoted = quote lbs in case _quoting opts of Never -> s AsNeeded -> if quotingIsNecessary opts lbs then quoted else s Always -> quoted quote :: LBS.ByteString -> BS.Builder quote bs = let q = BS.charUtf8 '"' bs' = BS.lazyByteString (escapeQuotes bs) in q <> bs' <> q escapeQuotes :: LBS.ByteString -> LBS.ByteString escapeQuotes = LBS.concatMap duplicateQuote where duplicateQuote :: Word8 -> LBS.ByteString duplicateQuote 34 = LBS.pack [34,34] -- 34 = quote duplicateQuote c = LBS.singleton c -- | Encode an 'Int' int :: Encode Int int = unsafeBuilder BS.intDec -- | Encode an 'Integer' integer :: Encode Integer integer = unsafeBuilder BS.integerDec -- | Encode a 'Float' float :: Encode Float float = unsafeBuilder BS.floatDec -- | Encode a 'Double' -- -- This version satisfies the roundtrip property. If that doesn't matter to you, -- use the faster version 'doubleFast' double :: Encode Double double = unsafeBuilder BS.doubleDec -- | Encode a 'Double' really quickly. This version uses the @double-conversion@ -- package. doubleFast :: Encode Double doubleFast = contramap DC.toShortest unsafeByteString -- | Encode a 'String' string :: Encode String string = escaped BS.stringUtf8 -- | Encode a 'Data.Text.Text' text :: Encode T.Text text = escaped (BS.byteString . T.encodeUtf8) -- | Encode a strict 'Data.ByteString.ByteString' byteString :: Encode Strict.ByteString byteString = escaped BS.byteString -- | Encode a lazy 'Data.ByteString.Lazy.ByteString' lazyByteString :: Encode LBS.ByteString lazyByteString = escaped BS.lazyByteString escaped :: (s -> BS.Builder) -> Encode s escaped build = mkEncodeWithOpts $ \opts s -> enquote opts (build s) -- | Encode a 'Bool' as True or False boolTrueFalse :: Encode Bool boolTrueFalse = mkEncodeBS $ B.bool "False" "True" -- | Encode a 'Bool' as true of false booltruefalse :: Encode Bool booltruefalse = mkEncodeBS $ B.bool "false" "true" -- | Encode a 'Bool' as yes or no boolyesno :: Encode Bool boolyesno = mkEncodeBS $ B.bool "no" "yes" -- | Encode a 'Bool' as Yes or No boolYesNo :: Encode Bool boolYesNo = mkEncodeBS $ B.bool "No" "Yes" -- | Encode a 'Bool' as Y or N boolYN :: Encode Bool boolYN = mkEncodeBS $ B.bool "N" "Y" -- | Encode a 'Bool' as 1 or 0 bool10 :: Encode Bool bool10 = mkEncodeBS $ B.bool "0" "1" mkNamed :: Encode a -> Seq BS.Builder -> NameEncode a mkNamed enc b = NameEncode (ComposeFC (writer (enc, b))) -- | Attach a column name to an 'Encode'. This is used for building 'Encode's -- with headers. -- -- Best used with @OverloadedStrings@ named :: BS.Builder -> Encode a -> NameEncode a named name enc = mkNamed enc (pure name) -- | Synonym for 'named'. -- -- Mnemonic: __D__ot colon names __D__ecoders, __E__qual colon names __E__ncoders. (=:) :: BS.Builder -> Encode a -> NameEncode a (=:) = named runNamed :: NameEncode a -> (Encode a, Seq BS.Builder) runNamed = runWriter . getComposeFC . unNamedE -- | Given an optic from @s@ to @a@, Try to use it to build an encode. -- -- @ -- encodeOf :: Iso' s a -> Encode a -> Encode s -- encodeOf :: Lens' s a -> Encode a -> Encode s -- encodeOf :: Prism' s a -> Encode a -> Encode s -- encodeOf :: Traversal' s a -> Encode a -> Encode s -- encodeOf :: Fold s a -> Encode a -> Encode s -- encodeOf :: Getter s a -> Encode a -> Encode s -- @ -- -- This is very useful when you have a prism for each constructor of your type. -- You can define an 'Encode' as follows: -- -- @ -- myEitherEncode :: Encode a -> Encode b -> Encode (Either a b) -- myEitherEncode encA encB = encodeOf _Left encA <> encodeOf _Right encB -- @ -- -- In this example, when the prism lookup returns 'Nothing', the empty encoder -- is returned. This is the 'mempty' for the 'Encode' monoid, so it won't -- add a field to the resulting CSV. This is the behaviour you want for -- combining a collection of prisms. -- -- But this encoder also works with lenses (or weaker optics), which will -- never fail their lookup, in which case it never returns 'mempty'. -- So this actually does the right thing for both sum and product types. encodeOf :: Getting (First a) s a -> Encode a -> Encode s encodeOf g = encodeOfMay g . choose (maybe (Left ()) Right) conquer -- | Like 'encodeOf', but you can handle 'Nothing' however you'd like. -- In 'encodeOf', it is handled by the Encode which does nothing, -- but for example you might like to use 'orEmpty' to encode an empty field. encodeOfMay :: Getting (First a) s a -> Encode (Maybe a) -> Encode s encodeOfMay g x = contramap (preview g) x -- | Encode a 'String' really quickly. -- If the string has quotes in it, they will not be escaped properly, so -- the result maybe not be valid CSV unsafeString :: Encode String unsafeString = unsafeBuilder BS.stringUtf8 -- | Encode 'Data.Text.Text' really quickly. -- If the text has quotes in it, they will not be escaped properly, so -- the result maybe not be valid CSV unsafeText :: Encode T.Text unsafeText = unsafeBuilder (BS.byteString . T.encodeUtf8) -- | Encode ByteString 'Data.ByteString.Builder.Builder' really quickly. -- If the builder builds a string with quotes in it, they will not be escaped -- properly, so the result maybe not be valid CSV unsafeByteStringBuilder :: Encode BS.Builder unsafeByteStringBuilder = unsafeBuilder id -- | Encode a 'Data.ByteString.ByteString' really quickly. -- If the string has quotes in it, they will not be escaped properly, so -- the result maybe not be valid CSV unsafeByteString :: Encode Strict.ByteString unsafeByteString = unsafeBuilder BS.byteString -- | Encode a 'Data.ByteString.Lazy.ByteString' really quickly. -- If the string has quotes in it, they will not be escaped properly, so -- the result maybe not be valid CSV unsafeLazyByteString :: Encode LBS.ByteString unsafeLazyByteString = unsafeBuilder BS.lazyByteString -- | Encode this 'Data.ByteString.ByteString' really quickly every time, ignoring the input. -- If the string has quotes in it, they will not be escaped properly, so -- the result maybe not be valid CSV unsafeConst :: Strict.ByteString -> Encode a unsafeConst b = contramap (pure b) unsafeByteString