-- | Functions for working with CSV files. module Analyze.Csv where import Analyze.Conversions (projectRows) import Analyze.RFrame (RFrame (..), RFrameUpdate (..), empty, fromUpdate) import Control.Monad.Catch (Exception, MonadThrow (..)) import qualified Data.Binary.Builder as B import qualified Data.ByteString.Lazy as LBS import qualified Data.Csv as C import qualified Data.Csv.Builder as CB import Data.Text (Text) import Data.Text.Encoding (decodeUtf8, encodeUtf8) import Data.Typeable (Typeable) import qualified Data.Vector as V -- | Exception to wrap Cassava error strings. data CsvError = CsvError String deriving (Eq, Show, Typeable) instance Exception CsvError -- | Decode CSV bytes as an 'RFrame' with a header row. decodeWithHeader :: MonadThrow m => LBS.ByteString -> m (RFrame Text Text) decodeWithHeader bs = case C.decodeByName bs of Left err -> throwM (CsvError err) Right (header, rows) -> do let ks = decodeUtf8 <$> header projectRows ks rows -- | Decode CSV bytes as an 'RFrame' without a header row. decodeWithoutHeader :: MonadThrow m => LBS.ByteString -> m (RFrame Int Text) decodeWithoutHeader bs = case C.decode C.NoHeader bs of Left err -> throwM (CsvError err) Right rows -> if V.null rows then return empty else do let ks = V.imap const (V.head rows) update = RFrameUpdate ks rows fromUpdate update -- | Encode an 'RFrame' as CSV bytes with a header row. encodeWithHeader :: RFrame Text Text -> LBS.ByteString encodeWithHeader (RFrame ks _ vs) = let header = CB.encodeHeader (encodeUtf8 <$> ks) rows = header `mappend` foldMap (CB.encodeRecord . (encodeUtf8 <$>)) vs in B.toLazyByteString header -- | Encode an 'RFrame' as CSV bytes without header row. encodeWithoutHeader :: RFrame k Text -> LBS.ByteString encodeWithoutHeader (RFrame _ _ vs) = B.toLazyByteString (foldMap (CB.encodeRecord . (encodeUtf8 <$>)) vs)