{-# LANGUAGE BangPatterns #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE RankNTypes #-} -- | -- Copyright: 2011 Michael Snoyman, 2010 John Millikin -- License: MIT -- -- Consume attoparsec parsers via conduit. -- -- This code was taken from attoparsec-enumerator and adapted for conduits. module Data.Conduit.Attoparsec ( -- * Sink sinkParser , sinkParserEither -- * Conduit , conduitParser , conduitParserEither -- * Types , ParseError (..) , Position (..) , PositionRange (..) -- * Classes , AttoparsecInput ) where import Control.Exception (Exception) import Control.Monad (unless) import qualified Data.ByteString as B import qualified Data.Text as T import qualified Data.Text.Internal as TI import Data.Typeable (Typeable) import Prelude hiding (lines) import qualified Data.Attoparsec.ByteString import qualified Data.Attoparsec.Text import qualified Data.Attoparsec.Types as A import Data.Conduit import Control.Monad.Trans.Resource (MonadThrow, throwM) -- | The context and message from a 'A.Fail' value. data ParseError = ParseError { errorContexts :: [String] , errorMessage :: String , errorPosition :: Position } | DivergentParser deriving (Show, Typeable) instance Exception ParseError data Position = Position { posLine :: {-# UNPACK #-} !Int , posCol :: {-# UNPACK #-} !Int , posOffset :: {-# UNPACK #-} !Int -- ^ @since 1.2.0 } deriving (Eq, Ord) instance Show Position where show (Position l c off) = show l ++ ':' : show c ++ " (" ++ show off ++ ")" data PositionRange = PositionRange { posRangeStart :: {-# UNPACK #-} !Position , posRangeEnd :: {-# UNPACK #-} !Position } deriving (Eq, Ord) instance Show PositionRange where show (PositionRange s e) = show s ++ '-' : show e -- | A class of types which may be consumed by an Attoparsec parser. class AttoparsecInput a where parseA :: A.Parser a b -> a -> A.IResult a b feedA :: A.IResult a b -> a -> A.IResult a b empty :: a isNull :: a -> Bool getLinesCols :: a -> Position -- | Return the beginning of the first input with the length of -- the second input removed. Assumes the second string is shorter -- than the first. stripFromEnd :: a -> a -> a instance AttoparsecInput B.ByteString where parseA = Data.Attoparsec.ByteString.parse feedA = Data.Attoparsec.ByteString.feed empty = B.empty isNull = B.null getLinesCols = B.foldl' f (Position 0 0 0) where f (Position l c o) ch | ch == 10 = Position (l + 1) 0 (o + 1) | otherwise = Position l (c + 1) (o + 1) stripFromEnd b1 b2 = B.take (B.length b1 - B.length b2) b1 instance AttoparsecInput T.Text where parseA = Data.Attoparsec.Text.parse feedA = Data.Attoparsec.Text.feed empty = T.empty isNull = T.null getLinesCols = T.foldl' f (Position 0 0 0) where f (Position l c o) ch | ch == '\n' = Position (l + 1) 0 (o + 1) | otherwise = Position l (c + 1) (o + 1) stripFromEnd (TI.Text arr1 off1 len1) (TI.Text _ _ len2) = TI.text arr1 off1 (len1 - len2) -- | Convert an Attoparsec 'A.Parser' into a 'Sink'. The parser will -- be streamed bytes until it returns 'A.Done' or 'A.Fail'. -- -- If parsing fails, a 'ParseError' will be thrown with 'throwM'. -- -- Since 0.5.0 sinkParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> ConduitT a o m b sinkParser = fmap snd . sinkParserPosErr (Position 1 1 0) -- | Same as 'sinkParser', but we return an 'Either' type instead -- of raising an exception. -- -- Since 1.1.5 sinkParserEither :: (AttoparsecInput a, Monad m) => A.Parser a b -> ConduitT a o m (Either ParseError b) sinkParserEither = (fmap.fmap) snd . sinkParserPos (Position 1 1 0) -- | Consume a stream of parsed tokens, returning both the token and -- the position it appears at. This function will raise a 'ParseError' -- on bad input. -- -- Since 0.5.0 conduitParser :: (AttoparsecInput a, MonadThrow m) => A.Parser a b -> ConduitT a (PositionRange, b) m () conduitParser parser = conduit $ Position 1 1 0 where conduit !pos = await >>= maybe (return ()) go where go x = do leftover x (!pos', !res) <- sinkParserPosErr pos parser yield (PositionRange pos pos', res) conduit pos' {-# SPECIALIZE conduitParser :: MonadThrow m => A.Parser T.Text b -> ConduitT T.Text (PositionRange, b) m () #-} {-# SPECIALIZE conduitParser :: MonadThrow m => A.Parser B.ByteString b -> ConduitT B.ByteString (PositionRange, b) m () #-} -- | Same as 'conduitParser', but we return an 'Either' type instead -- of raising an exception. conduitParserEither :: (Monad m, AttoparsecInput a) => A.Parser a b -> ConduitT a (Either ParseError (PositionRange, b)) m () conduitParserEither parser = conduit $ Position 1 1 0 where conduit !pos = await >>= maybe (return ()) go where go x = do leftover x eres <- sinkParserPos pos parser case eres of Left e -> yield $ Left e Right (!pos', !res) -> do yield $! Right (PositionRange pos pos', res) conduit pos' {-# SPECIALIZE conduitParserEither :: Monad m => A.Parser T.Text b -> ConduitT T.Text (Either ParseError (PositionRange, b)) m () #-} {-# SPECIALIZE conduitParserEither :: Monad m => A.Parser B.ByteString b -> ConduitT B.ByteString (Either ParseError (PositionRange, b)) m () #-} sinkParserPosErr :: (AttoparsecInput a, MonadThrow m) => Position -> A.Parser a b -> ConduitT a o m (Position, b) sinkParserPosErr pos0 p = sinkParserPos pos0 p >>= f where f (Left e) = throwM e f (Right a) = return a {-# INLINE sinkParserPosErr #-} sinkParserPos :: (AttoparsecInput a, Monad m) => Position -> A.Parser a b -> ConduitT a o m (Either ParseError (Position, b)) sinkParserPos pos0 p = sink empty pos0 (parseA p) where sink prev pos parser = await >>= maybe close push where push c | isNull c = sink prev pos parser | otherwise = go False c $ parser c close = go True prev (feedA (parser empty) empty) go end c (A.Done lo x) = do let pos' | end = pos | otherwise = addLinesCols prev pos y = stripFromEnd c lo pos'' = addLinesCols y pos' unless (isNull lo) $ leftover lo pos'' `seq` return $! Right (pos'', x) go end c (A.Fail rest contexts msg) = let x = stripFromEnd c rest pos' | end = pos | otherwise = addLinesCols prev pos pos'' = addLinesCols x pos' in pos'' `seq` return $! Left (ParseError contexts msg pos'') go end c (A.Partial parser') | end = return $! Left DivergentParser | otherwise = pos' `seq` sink c pos' parser' where pos' = addLinesCols prev pos addLinesCols :: AttoparsecInput a => a -> Position -> Position addLinesCols x (Position lines cols off) = lines' `seq` cols' `seq` off' `seq` Position lines' cols' off' where Position dlines dcols doff = getLinesCols x lines' = lines + dlines cols' = (if dlines > 0 then 1 else cols) + dcols off' = off + doff {-# INLINE sinkParserPos #-}