{-# LANGUAGE TypeOperators #-}
module Frames.Dsv where
import Control.Monad (when)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import Data.Maybe (isNothing)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Vector as V
import Data.Vinyl
import Data.Vinyl.Functor ((:.), Compose(..))
import Data.Word (Word8)
import Frames (recMaybe, Record)
import Frames.CSV (defaultParser, pipeTableMaybeOpt)
import Frames.CSV (ReadRec(..), Separator, ParserOptions(..))
import Frames.TH (rowGen, RowGen(..), tableTypes')
import qualified HaskellWorks.Data.Dsv.Lazy.Cursor as SVL
import Pipes (MonadIO, Producer, (>->), yield)
import qualified Pipes as P
import qualified Pipes.Prelude as P
import Language.Haskell.TH
rowLoop :: Monad m => SVL.DsvCursor -> Producer [BS.ByteString] m ()
rowLoop c =
if SVL.dsvCursorPosition d > SVL.dsvCursorPosition c && not (SVL.atEnd c)
then do yield (V.toList (SVL.getRowBetweenStrict c d dEnd))
rowLoop (SVL.trim d)
else return ()
where nr = SVL.nextRow c
d = SVL.nextPosition nr
dEnd = SVL.atEnd nr
dsvRowsByte :: MonadIO m => FilePath -> Word8 -> Producer [BS.ByteString] m ()
dsvRowsByte fp columnSeparator =
do bs <- P.liftIO (LBS.readFile fp)
rowLoop (SVL.makeCursor columnSeparator bs)
dsvRows' :: MonadIO m => FilePath -> Word8 -> Producer [Text] m ()
dsvRows' fp = (>-> P.map (map T.decodeUtf8)) . dsvRowsByte fp
dsvRowsLatin1' :: MonadIO m => FilePath -> Word8 -> Producer [Text] m ()
dsvRowsLatin1' fp = (>-> P.map (map T.decodeLatin1)) . dsvRowsByte fp
dsvSepErr :: DsvParserOptionsError -> a
dsvSepErr = error . ("DSV separator must be a single character: "++) . show
dsvRows :: MonadIO m => FilePath -> Separator -> Producer [Text] m ()
dsvRows fp = either dsvSepErr (dsvRows' fp) . separatorWord8
dsvRowsLatin1 :: MonadIO m => FilePath -> Separator -> Producer [Text] m ()
dsvRowsLatin1 fp = either dsvSepErr (dsvRowsLatin1' fp) . separatorWord8
data DsvParserOptionsError = SeparatorIsNull
| SeparatorCharIsMoreThanOneByte
| SeparatorIsMoreThanOneChar
deriving (Eq, Show)
separatorWord8 :: Separator -> Either DsvParserOptionsError Word8
separatorWord8 sep =
case T.uncons sep of
Nothing -> Left SeparatorIsNull
Just (h, t)
| T.null t -> let i = fromEnum h
in if i < 256
then Right (fromIntegral i)
else Left SeparatorCharIsMoreThanOneByte
| otherwise -> Left SeparatorIsMoreThanOneChar
readDsvTableMaybeOpt :: (MonadIO m, ReadRec rs, RMap rs)
=> ParserOptions
-> FilePath
-> P.Producer (Rec (Maybe :. ElField) rs) m ()
readDsvTableMaybeOpt opts csvFile =
dsvRows csvFile (columnSeparator opts) >-> pipeTableMaybeOpt opts
{-# INLINE readDsvTableMaybeOpt #-}
readDsvTableOpt :: (MonadIO m, ReadRec rs, RMap rs)
=> ParserOptions -> FilePath -> P.Producer (Record rs) m ()
readDsvTableOpt opts csvFile = readDsvTableMaybeOpt opts csvFile P.>-> go
where go = P.await >>= maybe go (\x -> P.yield x >> go) . recMaybe
{-# INLINE readDsvTableOpt #-}
readDsvTable :: (MonadIO m, ReadRec rs, RMap rs)
=> FilePath -> P.Producer (Record rs) m ()
readDsvTable = readDsvTableOpt defaultParser
{-# INLINE readDsvTable #-}
dsvTableTypes :: String -> FilePath -> DecsQ
dsvTableTypes n fp =
tableTypes' (rowGen fp) { rowTypeName = n
, lineReader = dsvRows fp }