{-# LANGUAGE RecordWildCards #-} module Data.CSV.LexerUtils where import Control.Monad.State import qualified Data.Text.Lazy as LT import qualified Data.Text.Lazy.Encoding as LT import Data.ByteString (ByteString) import qualified Data.ByteString as B import Data.ByteString.Builder import qualified Data.ByteString.Internal as B (w2c) import qualified Data.ByteString.Lazy as L import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Encoding as T import Data.Word import GHC.Int appendMode :: Action appendMode len bs = do s@LexerState {..} <- get put s { stringBuffer = stringBuffer <> (lazyByteString $ L.take len (alexStr bs)) } pure Nothing endMode :: Action endMode _ _ = do mode <- gets lexerMode case mode of InNormal -> pure Nothing InString -> apply where apply = do buf <- toLazyByteString <$> gets stringBuffer modify $ \s -> s { lexerMode = InNormal , stringBuffer = mempty } pure $ Just (Item buf) data LexerMode = InNormal | InString deriving (Show, Eq) data Token = Item !L.ByteString | Newline | Comma | TokenError !Error deriving (Show, Eq) type Action = Int64 -> AlexInput -> State LexerState (Maybe Token) data AlexInput = AlexInput { alexChar :: {-# UNPACK #-} !Char , alexStr :: !L.ByteString , alexBytePos :: {-# UNPACK #-} !Int } deriving (Show, Eq) data LexerState = LexerState { matchedInput :: {-# UNPACK #-} !AlexInput , lexerMode :: !LexerMode , stringBuffer :: !Builder } item :: Action item inputLength _ = do LexerState {..} <- get pure $ Just $ Item $ L.take inputLength (alexStr matchedInput) token t = \_ _ -> pure (Just t) errorAction :: AlexInput -> State LexerState [Token] errorAction AlexInput {..} = pure [TokenError (LexerError (LT.unpack $ LT.decodeUtf8 alexStr))] data Error = LexerError String | UntermString deriving (Show, Eq) eofAction :: State LexerState [Token] eofAction = do mode <- gets lexerMode pure $ case mode of InString -> [TokenError UntermString] InNormal -> [] alexGetByte :: AlexInput -> Maybe (Word8, AlexInput) alexGetByte AlexInput {..} = case L.uncons alexStr of Nothing -> Nothing Just (c, rest) -> Just (c, AlexInput { alexChar = B.w2c c, alexStr = rest, alexBytePos = alexBytePos+1 }) startString :: Action startString _ _ = Nothing <$ do modify $ \s -> s { lexerMode = InString } alexInputPrevChar :: AlexInput -> Char alexInputPrevChar = alexChar