{-# LANGUAGE TypeOperators #-}
module Frames.Dsv where
import Control.Monad (when)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
import Data.Maybe (isNothing)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Vector as V
import Data.Vinyl
import Data.Vinyl.Functor ((:.), Compose(..))
import Data.Word (Word8)
import Frames (recMaybe, Record)
import Frames.CSV (defaultParser, pipeTableMaybeOpt)
import Frames.CSV (ReadRec(..), Separator, ParserOptions(..))
import Frames.TH (rowGen, RowGen(..), tableTypes')
import qualified HaskellWorks.Data.Dsv.Lazy.Cursor as SVL
import qualified HaskellWorks.Data.Dsv.Lazy.Cursor.Strict as SVLS
import Pipes (MonadIO, Producer, (>->), yield)
import qualified Pipes as P
import qualified Pipes.Prelude as P
import Language.Haskell.TH
rowLoop :: Monad m => SVL.DsvCursor -> Producer [BS.ByteString] m ()
rowLoop c =
if SVL.dsvCursorPosition d > SVL.dsvCursorPosition c && not (SVL.atEnd c)
then do yield (SVLS.getRowListBetween c d dEnd)
rowLoop (SVL.trim d)
else return ()
where nr = SVL.nextRow c
d = SVL.nextPosition nr
dEnd = SVL.atEnd nr
dsvRowsByte :: MonadIO m => FilePath -> Word8 -> Producer [BS.ByteString] m ()
dsvRowsByte fp columnSeparator =
do bs <- P.liftIO (LBS.readFile fp)
rowLoop (SVL.makeCursor columnSeparator bs)
dsvRows' :: MonadIO m => FilePath -> Word8 -> Producer [Text] m ()
dsvRows' fp = (>-> P.map (map T.decodeUtf8)) . dsvRowsByte fp
dsvRowsLatin1' :: MonadIO m => FilePath -> Word8 -> Producer [Text] m ()
dsvRowsLatin1' fp = (>-> P.map (map T.decodeLatin1)) . dsvRowsByte fp
dsvSepErr :: DsvParserOptionsError -> a
dsvSepErr = error . ("DSV separator must be a single character: "++) . show
dsvRows :: MonadIO m => FilePath -> Separator -> Producer [Text] m ()
dsvRows fp = either dsvSepErr (dsvRows' fp) . separatorWord8
dsvRowsLatin1 :: MonadIO m => FilePath -> Separator -> Producer [Text] m ()
dsvRowsLatin1 fp = either dsvSepErr (dsvRowsLatin1' fp) . separatorWord8
data DsvParserOptionsError = SeparatorIsNull
| SeparatorCharIsMoreThanOneByte
| SeparatorIsMoreThanOneChar
deriving (Eq, Show)
separatorWord8 :: Separator -> Either DsvParserOptionsError Word8
separatorWord8 sep =
case T.uncons sep of
Nothing -> Left SeparatorIsNull
Just (h, t)
| T.null t -> let i = fromEnum h
in if i < 256
then Right (fromIntegral i)
else Left SeparatorCharIsMoreThanOneByte
| otherwise -> Left SeparatorIsMoreThanOneChar
readDsvTableMaybeOpt :: (MonadIO m, ReadRec rs, RMap rs)
=> ParserOptions
-> FilePath
-> P.Producer (Rec (Maybe :. ElField) rs) m ()
readDsvTableMaybeOpt opts csvFile =
dsvRows csvFile (columnSeparator opts) >-> pipeTableMaybeOpt opts
{-# INLINE readDsvTableMaybeOpt #-}
readDsvTableOpt :: (MonadIO m, ReadRec rs, RMap rs)
=> ParserOptions -> FilePath -> P.Producer (Record rs) m ()
readDsvTableOpt opts csvFile = readDsvTableMaybeOpt opts csvFile P.>-> go
where go = P.await >>= maybe go (\x -> P.yield x >> go) . recMaybe
{-# INLINE readDsvTableOpt #-}
readDsvTable :: (MonadIO m, ReadRec rs, RMap rs)
=> FilePath -> P.Producer (Record rs) m ()
readDsvTable = readDsvTableOpt defaultParser
{-# INLINE readDsvTable #-}
dsvTableTypes :: String -> FilePath -> DecsQ
dsvTableTypes n fp =
tableTypes' (rowGen fp) { rowTypeName = n
, lineReader = dsvRows fp }