{-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE ExistentialQuantification #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FunctionalDependencies #-} module Data.IterIO.Iter (-- * Base types ChunkData(..), Chunk(..), chunk, chunkEOF , Iter(..), CtlCmd, CtlRes(..), CtlArg(..), IterFail(..) , IterR(..), iterF , isIterActive, iterShows, iterShow -- * Execution , run, runI -- * Exception types , mkIterEOF , IterCUnsupp(..) -- * Exception-related functions , throwI, throwEOFI, throwParseI , catchI, tryI, tryFI, tryRI, tryEOFI , finallyI, onExceptionI , tryBI, tryFBI , ifParse, ifNoParse, multiParse -- * Some basic Iters , nullI, data0I, dataI, pureI, chunkI , whileNullI, peekI, atEOFI, ungetI , safeCtlI, ctlI -- * Internal functions , onDone, fmapI , onDoneR, stepR, stepR', runR, fmapR, reRunIter, runIterR , getResid, setResid ) where import Prelude hiding (null) import qualified Prelude import Control.Applicative (Applicative(..), (<$), (<$>)) import Control.Exception (SomeException(..), ErrorCall(..), Exception(..) , try, throw) import Control.Monad import Control.Monad.Fix import Control.Monad.Trans import Data.IORef import Data.Maybe import Data.Monoid import Data.Typeable import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy.Char8 as L8 import System.IO.Error (mkIOError, eofErrorType, isEOFError) import System.IO.Unsafe -- -- Iteratee types and instances -- -- | @ChunkData@ is the class of data types that can be output by an -- enumerator and iterated on with an iteratee. A @ChunkData@ type -- must be a 'Monoid', but must additionally provide a predicate, -- @null@, for testing whether an object is equal to 'mempty'. -- Feeding a @null@ chunk to an iteratee followed by any other chunk -- should have the same effect as just feeding the second chunk. To -- simplify debugging, there is an additional requirement that -- @ChunkData@ be convertable to a String with the @chunkShow@ method. -- -- Note that because the "Prelude" contains a function 'Prelude.null' -- for lists, you may wish to include the import: -- -- > import Prelude hiding (null) -- class (Monoid t) => ChunkData t where null :: t -> Bool chunkShow :: t -> String instance (Show a) => ChunkData [a] where {-# INLINE null #-} null = Prelude.null chunkShow = show instance ChunkData L.ByteString where {-# INLINE null #-} null = L.null chunkShow = show . L8.unpack instance ChunkData S.ByteString where {-# INLINE null #-} null = S.null chunkShow = show . S8.unpack instance ChunkData () where {-# INLINE null #-} null _ = True chunkShow _ = "()" -- | @Chunk@ is a wrapper around a 'ChunkData' type that also includes -- an EOF flag that is 'True' if the data is followed by an -- end-of-file condition. An 'Iter' that receives a @Chunk@ with EOF -- 'True' must return a result (or failure); it is an error to demand -- more data (return 'IterF') after an EOF. data Chunk t = Chunk !t !Bool deriving (Eq, Typeable) instance (ChunkData t) => Show (Chunk t) where showsPrec _ (Chunk t eof) rest = chunkShow t ++ if eof then "+EOF" ++ rest else rest instance Functor Chunk where {-# INLINE fmap #-} fmap f (Chunk t eof) = Chunk (f t) eof instance (ChunkData t) => Monoid (Chunk t) where {-# INLINE mempty #-} mempty = Chunk mempty False {-# INLINABLE mappend #-} mappend ca@(Chunk a eofa) cb@(Chunk b eofb) | eofa = error $ "mappend to EOF: " ++ show ca ++ " `mappend` " ++ show cb | null b = Chunk a eofb -- Just an optimization for case below | otherwise = Chunk (mappend a b) eofb -- | A 'Chunk' is 'null' when its data is 'null' and its EOF flag is -- 'False'. instance (ChunkData t) => ChunkData (Chunk t) where {-# INLINE null #-} null (Chunk t False) = null t null (Chunk _ True) = False chunkShow = show -- | Constructor function that builds a chunk containing data and a -- 'False' EOF flag. chunk :: t -> Chunk t {-# INLINE chunk #-} chunk t = Chunk t False -- | An chunk with 'mempty' data and the EOF flag 'True'. chunkEOF :: (Monoid t) => Chunk t {-# INLINE chunkEOF #-} chunkEOF = Chunk mempty True -- | The basic Iteratee type is @Iter t m a@, where @t@ is the type of -- input (in class 'ChunkData'), @m@ is a monad in which the iteratee -- may execute actions (using the 'MonadTrans' 'lift' method), and @a@ -- is the result type of the iteratee. -- -- Internally, an @Iter@ is a function from an input 'Chunk' to a -- result of type 'IterR'. newtype Iter t m a = Iter { runIter :: Chunk t -> IterR t m a } -- | Builds an 'Iter' that keeps requesting input until it receives a -- non-'null' 'Chunk'. In other words, the 'Chunk' fed to the -- argument function is guaranteed either to contain data or to have -- the EOF flag true (or both). iterF :: (ChunkData t) => (Chunk t -> IterR t m a) -> Iter t m a {-# INLINE iterF #-} iterF f = loop where loop = Iter $ \c -> if null c then IterF $ loop else f c -- | Class of control commands for enclosing enumerators. The class -- binds each control argument type to a unique result type. class (Typeable carg, Typeable cres) => CtlCmd carg cres | carg -> cres -- | The outcome of an 'IterC' request. data CtlRes a = CtlUnsupp -- ^ The request type was not supported by the enumerator. | CtlFail !SomeException -- ^ The request was supported, and executing it caused -- an exception to be thrown. | CtlDone !a -- ^ The result of the control operation. deriving (Typeable) -- | Used when an 'Iter' is issuing a control request to an enclosing -- enumerator. Note that unlike 'IterF' or 'IterM', control requests -- expose the residual data, which is ordinarily fed right back to the -- continuation upon execution of the request. This allows certain -- control operations (such as seek and tell) to flush, check the -- length of, or adjust the residual data. data CtlArg t m a = forall carg cres. (CtlCmd carg cres) => CtlArg !carg (CtlRes cres -> Iter t m a) (Chunk t) -- | Contains information about a failed 'Iter'. Failures of type -- 'IterException' must be caught by 'catchI' (or 'tryI', etc.). -- However, any other type of failure is considered a parse error, and -- will be caught by 'multiParse', 'ifParse', and 'mplus'. data IterFail = IterException !SomeException -- ^ An actual error occured that is not a parse error, -- EOF, etc. | IterExpected [(String, String)] -- ^ List of @(input_seen, input_expected)@ pairs. | IterEOFErr IOError -- ^ An EOF error occurred, either in some IO action -- wrapped by 'liftIO', or in some 'Iter' that called -- 'throwEOFI'. | IterParseErr String -- ^ A miscellaneous parse error occured. | IterMzero -- ^ What you get from 'mzero'. Useful if you don't -- want to specify any information about the failure. deriving (Typeable) instance Show IterFail where showsPrec _ (IterException e) rest = shows e rest showsPrec _ (IterExpected swl) rest = "Input failed to match all expectations\n" ++ fmt swl where fmt [] = rest fmt ((saw, expected):t) = " expected " ++ show expected ++ ", saw " ++ (take 50 $ show saw) ++ "\n" ++ fmt t showsPrec _ (IterEOFErr e) rest = "IterEOFErr: " ++ shows e rest showsPrec _ (IterParseErr e) rest = "IterParseErr: " ++ e ++ rest showsPrec _ IterMzero rest = "IterMZero" ++ rest instance Exception IterFail where {-# INLINE toException #-} toException (IterException e) = e toException (IterEOFErr e) = toException e toException e = SomeException e -- | An @IterR@ is the result of feeding a chunk of data to an 'Iter'. -- An @IterR@ is in one of several states: it may require more input -- ('IterF'), it may wish to execute monadic actions in the -- transformed monad ('IterM'), it may have a control request for an -- enclosing enumerator ('IterC'), it may have produced a result -- ('Done'), or it may have failed ('Fail'). data IterR t m a = IterF !(Iter t m a) -- ^ The iteratee requires more input. | IterM !(m (IterR t m a)) -- ^ The iteratee must execute monadic bind in monad @m@ | IterC !(CtlArg t m a) -- ^ A control request (see 'CtlArg'). | Done a (Chunk t) -- ^ Sufficient input was received; the 'Iter' is -- returning a result of type @a@. In adition, the -- 'IterR' has a 'Chunk' containing any residual -- input that was not consumed in producing the -- result. | Fail !IterFail !(Maybe a) !(Maybe (Chunk t)) -- ^ The 'Iter' failed. If it was an enumerator, the -- target 'Iter' that the enumerator was feeding -- likely has not failed, in which case its current -- state is returned in the @Maybe a@. If it makes -- sense to preserve the state of the input stream -- (which it does for most errors except parse -- errors), then the third parameter includes the -- residual 'Chunk' at the time of the failure. -- | True if an 'IterR' is requesting something from an -- enumerator--i.e., the 'IterR' is not 'Done' or 'Fail'. isIterActive :: IterR t m a -> Bool {-# INLINE isIterActive #-} isIterActive (IterF _) = True isIterActive (IterM _) = True isIterActive (IterC _) = True isIterActive _ = False -- | Show the current state of an 'IterR', prepending it to some -- remaining input (the standard 'ShowS' optimization), when 'a' is in -- class 'Show'. Note that if @a@ is not in 'Show', you can simply -- use the 'shows' function. iterShows :: (ChunkData t, Show a) => IterR t m a -> ShowS iterShows (IterC (CtlArg a _ c)) rest = "IterC " ++ (shows (typeOf a) $ " _ " ++ shows c rest) iterShows (Done a c) rest = "Done " ++ (shows a $ " " ++ shows c rest) iterShows (Fail e a c) rest = "Fail " ++ (shows e $ " (" ++ (shows a $ ") " ++ shows c rest)) iterShows iter rest = shows iter rest -- | Show the current state of an 'Iter' if type @a@ is in the 'Show' -- class. (Otherwise, you can simply use the ordinary 'show' -- function.) iterShow :: (ChunkData t, Show a) => IterR t m a -> String iterShow iter = iterShows iter "" instance (ChunkData t) => Show (IterR t m a) where showsPrec _ (IterF _) rest = "IterF _" ++ rest showsPrec _ (IterM _) rest = "IterM _" ++ rest showsPrec _ (IterC (CtlArg a _ c)) rest = "IterC " ++ (shows (typeOf a) $ " _" ++ shows c rest) showsPrec _ (Done _ c) rest = "Done _ " ++ shows c rest showsPrec _ (Fail e a c) rest = "Fail " ++ (shows e $ (if isJust a then " Just _ " else " Nothing ") ++ shows c rest) iterTc :: TyCon iterTc = mkTyCon "Iter" instance (Typeable t, Typeable1 m) => Typeable1 (Iter t m) where typeOf1 iter = mkTyConApp iterTc [typeOf $ t iter, typeOf1 $ m iter] where t :: Iter t m a -> t; t _ = undefined m :: Iter t m a -> m (); m _ = undefined instance (Typeable t, Typeable1 m, Typeable a) => Typeable (Iter t m a) where typeOf = typeOfDefault instance (Monad m) => Functor (Iter t m) where {-# INLINE fmap #-} fmap = fmapI -- | @fmapI@ is like 'liftM', but differs in one important respect: -- it preserves the failed result of an enumerator (and in fact -- applies the function to the non-failed target 'Iter' state). By -- contrast, 'liftM', which is equivalent to @'liftM' f i = i '>>=' -- 'return' . f@, transforms the @'Maybe' a@ component of all 'Fail' -- states to 'Nothing' because of its use of '>>='. fmapI :: (Monad m) => (a -> b) -> Iter t m a -> Iter t m b {-# INLINE fmapI #-} fmapI = onDone . fmapR -- | Maps the result of an 'IterR' like 'fmap', but only if the -- 'IterR' is no longer active. It is an error to call this function -- on an 'IterR' in the 'IterF', 'IterM', or 'IterC' state. Because -- of this restriction, @fmapR@ does not require the input and output -- 'Monad' types (@m1@ and @m2@) to be the same. fmapR :: (a -> b) -> IterR t m1 a -> IterR t m2 b {-# INLINE fmapR #-} fmapR f (Done a c) = Done (f a) c fmapR f (Fail e a c) = Fail e (fmap f a) c fmapR _ (IterF _) = error "fmapR (IterF)" fmapR _ (IterM _) = error "fmapR (IterM)" fmapR _ (IterC _) = error "fmapR (IterC)" instance (Monad m) => Functor (IterR t m) where fmap = onDoneR . fmapR instance (Monad m) => Applicative (Iter t m) where pure = return (<*>) = ap (*>) = (>>) a <* b = do r <- a; b >> return r instance (Monad m) => Monad (Iter t m) where {-# INLINE return #-} return a = Iter $ Done a {-# INLINE (>>=) #-} -- Because check calls itself and (>>=) recursively, IterF and -- IterM likely cannot be inlined. However, inlining Done and -- Fail (which can occur quite often for parsing failures) in the -- first part of this function seems to give about a 1-2% speedup. m >>= k = Iter $ \c0 -> case runIter m c0 of (Done a c) -> runIter (k a) c (Fail e _ c) -> Fail e Nothing c r -> check r where check (IterM mm) = IterM $ mm >>= return . check check (IterF i) = IterF $ i >>= k check (IterC (CtlArg a n c)) = IterC $ CtlArg a (n >=> k) c check (Fail e _ c) = Fail e Nothing c check (Done a c) = runIter (k a) c fail msg = Iter $ Fail (IterException $ toException $ ErrorCall msg) Nothing . Just instance (ChunkData t, Monad m) => MonadPlus (Iter t m) where {-# INLINE mzero #-} mzero = Iter $ const $ Fail IterMzero Nothing Nothing mplus a b = ifParse a return b instance MonadTrans (Iter t) where {-# INLINE lift #-} lift m = Iter $ \c -> IterM $ m >>= \a -> return $ Done a c -- | The 'Iter' instance of 'MonadIO' handles errors specially. If -- the lifted operation throws an exception, 'liftIO' catches the -- exception and returns it as an 'IterFail' failure. If the -- exception is an 'IOError' satisfying 'isEOFError', then the -- exception is wrapped in the 'IterEOFErr' constructor; otherwise, it -- is wrapped in 'IterException' otherwise. This approach allows -- efficient testing for EOF errors without the need to invoke the -- expensive 'cast' or 'fromException' operations. (Yes @liftIO@ uses -- these expensive operations, but 'Iter's that invoke 'throwEOFI' do -- not.) -- -- One consequence of this exception handling is that with 'Iter', -- unlike with most monad transformers, 'liftIO' is /not/ equivalent -- to some number of nested calls to 'lift'. See the documentation of -- '.|$' for an example. instance (MonadIO m) => MonadIO (Iter t m) where liftIO m = Iter $ \c -> IterM $ liftIO $ do result <- try m case result of Right ok -> return $ Done ok c Left se -> return $ case fromException se of Just e | isEOFError e -> Fail (IterEOFErr e) Nothing (Just c) _ -> Fail (IterException se) Nothing (Just c) -- | This is a generalization of 'fixIO' for arbitrary members of the -- 'MonadIO' class. fixMonadIO :: (MonadIO m) => (a -> m a) -> m a fixMonadIO f = do ref <- liftIO $ newIORef $ throw $ toException $ ErrorCall "fixMonadIO: non-termination" a <- liftIO $ unsafeInterleaveIO $ readIORef ref r <- f a liftIO $ writeIORef ref r return r instance (MonadIO m) => MonadFix (Iter t m) where mfix = fixMonadIO -- -- Core functions -- -- | Feed an EOF to an 'Iter' and return the result. Throws an -- exception if there has been a failure. run :: (ChunkData t, Monad m) => Iter t m a -> m a run i0 = check $ runIter i0 chunkEOF where check (Done a _) = return a check (IterF i) = run i check (IterM m) = m >>= check check (IterC (CtlArg _ n c)) = check $ runIter (n CtlUnsupp) c check (Fail (IterException e) _ _) = throw e check (Fail e _ _) = throw e -- | The equivalent for 'runI' for 'IterR's. runR :: (ChunkData t1, ChunkData t2, Monad m) => IterR t1 m a -> IterR t2 m a {-# INLINABLE runR #-} runR (Done a _) = Done a mempty runR (IterF i) = runR $ runIter i chunkEOF runR (IterM m) = IterM $ liftM runR m runR (IterC (CtlArg _ n c)) = runR $ runIter (n CtlUnsupp) c runR (Fail e a c) = Fail e a $ mempty <$ c -- | Runs an 'Iter' from within a different 'Iter' monad. If -- successful, @runI iter@ will produce the same result as @'lift' -- ('run' iter)@. However, if @iter@ fails, 'run' throws a -- language-level exception, which cannot be caught within other -- 'Iter' monads. By contrast, @runI@ throws a monadic exception that -- can be caught. In short, use @runI@ in preference to @run@ in -- situations where both are applicable. See a more detailed -- discussion of the same issue with examples in the documentation for -- @'.|$'@ in "Data.IterIO.Inum". runI :: (ChunkData t1, ChunkData t2, Monad m) => Iter t1 m a -> Iter t2 m a {-# INLINABLE runI #-} runI i = Iter $ runIterR (runR $ runIter i chunkEOF) -- -- Exceptions -- -- | Make an 'IterEOFErr' from a String. mkIterEOF :: String -> IterFail mkIterEOF loc = IterEOFErr $ mkIOError eofErrorType loc Nothing Nothing -- | Exception thrown by 'CtlI' when the type of the control request -- is not supported by the enclosing enumerator. data IterCUnsupp = forall carg cres. (CtlCmd carg cres) => IterCUnsupp carg deriving (Typeable) instance Show IterCUnsupp where showsPrec _ (IterCUnsupp carg) rest = "Unsupported control request " ++ shows (typeOf carg) rest instance Exception IterCUnsupp -- -- Exception functions -- -- | Throw an exception from an Iteratee. The exception will be -- propagated properly through nested Iteratees, which will allow it -- to be categorized properly and avoid situations in which resources -- such as file handles are not released. (Most iteratee code does -- not assume the Monad parameter @m@ is in the 'MonadIO' class, and -- hence cannot use 'catch' or @'onException'@ to clean up after -- exceptions.) Use 'throwI' in preference to 'throw' whenever -- possible. -- -- Do not use @throwI@ to throw parse errors or EOF errors. Use -- 'throwEOFI' and 'throwParseI' instead. For performance reasons, -- the 'IterFail' type segregates EOF and parse errors from other -- types of failures. throwI :: (Exception e) => e -> Iter t m a throwI e = Iter $ Fail (IterException $ toException e) Nothing . Just -- | Throw an exception of type 'IterEOF'. This will be interpreted -- by 'mkInum' as an end of file chunk when thrown by the codec. It -- will also be interpreted by 'ifParse' and 'multiParse' as parsing -- failure. If not caught within the 'Iter' monad, the exception will -- be rethrown by 'run' (and hence '|$') as an 'IOError' of type EOF. throwEOFI :: String -> Iter t m a {-# INLINE throwEOFI #-} throwEOFI s = Iter $ Fail (mkIterEOF s) Nothing . Just -- | Throw a miscellaneous parse error (after which input is assumed -- to be unsynchronized and thus is discarded). Parse errors may be -- caught as exception type 'IterFail', but they can also be caught -- more efficiently by the functions 'multiParse', 'ifParse', and -- 'mplus'. throwParseI :: String -> Iter t m a {-# INLINE throwParseI #-} throwParseI s = Iter $ \_ -> Fail (IterParseErr s) Nothing Nothing -- | Run an 'Iter'. Catch any exception it throws (and return the -- failing iter state). Transform successful results with a function. -- -- This function is slightly more general than 'catchI'. For -- instance, we can't implement 'tryI' in terms of just 'catchI'. -- Something like -- -- > tryI iter = catchI (iter >>= return . Right) ... -- -- would remove the possibly unfailed 'Iter' state from failed 'Inum' -- results, because the '>>=' operator has this effect. (I.e., if -- @iter@ is @'Fail' e ('Just' i) c@, the expression @iter >>= return -- . Right@ will be @'Fail' e Nothing c@.) This could be particularly -- bad in cases where the exception is not even of a type caught by -- the 'tryI' expression. -- -- Similarly, trying to implement 'catchI' in terms of 'tryI' doesn't -- quite work. Something like -- -- > catchI iter handler = tryI iter >>= either (uncurry handler) return -- -- would erase state from 'Inum' failures /not/ caught by the handler. genCatchI :: (ChunkData t, Monad m, Exception e) => Iter t m a -- ^ 'Iter' that might throw an exception -> (e -> IterR t m a -> Iter t m b) -- ^ Exception handler -> (a -> b) -- ^ Conversion function for result and 'InumFail' errors. -> Iter t m b genCatchI iter0 handler conv = onDone check iter0 where check (Done a c) = Done (conv a) c check r@(Fail e0 _ _) = case fromException $ toException e0 of Just e -> runIter (handler e $ setResid r mempty) (getResid r) Nothing -> fmap conv r check _ = error "genCatchI" -- | Catch an exception thrown by an 'Iter', including exceptions -- thrown by any 'Inum's fused to the 'Iter' (or applied to it with -- '.|$'). If you wish to catch just errors thrown within 'Inum's, -- see the function @'inumCatch'@ in "Data.IterIO.Inum". -- -- On exceptions, @catchI@ invokes a handler passing it both the -- exception thrown and the state of the failing 'IterR', which may -- contain more information than just the exception. In particular, -- if the exception occured in an 'Inum', the returned 'IterR' will -- also contain the 'IterR' being fed by that 'Inum', which likely -- will not have failed. To avoid discarding this extra information, -- you should not re-throw exceptions with 'throwI'. Rather, you -- should re-throw an exception by re-executing the failed 'IterR' -- with 'reRunIter'. For example, a possible definition of -- 'onExceptionI' is: -- -- @ -- onExceptionI iter cleanup = -- iter \`catchI\` \\('SomeException' _) r -> cleanup >> 'reRunIter' r -- @ -- -- Note that @catchI@ only works for /synchronous/ exceptions, such as -- IO errors (thrown within 'liftIO' blocks), the monadic 'fail' -- operation, and exceptions raised by 'throwI'. It is not possible -- to catch /asynchronous/ exceptions, such as lazily evaluated -- divide-by-zero errors, the 'throw' function, or exceptions raised -- by other threads using @'throwTo'@ if those exceptions might arrive -- anywhere outside of a 'liftIO' call. -- -- @\`catchI\`@ has the default infix precedence (@infixl 9 -- \`catchI\`@), which binds more tightly than any concatenation or -- fusing operators. catchI :: (Exception e, ChunkData t, Monad m) => Iter t m a -- ^ 'Iter' that might throw an exception -> (e -> IterR t m a -> Iter t m a) -- ^ Exception handler, which gets as arguments both the -- exception and the failing 'Iter' state. -> Iter t m a {-# INLINE catchI #-} catchI iter handler = genCatchI iter handler id -- | If an 'Iter' succeeds and returns @a@, returns @'Right' a@. If -- the 'Iter' fails and throws an exception @e@ (of type @e@), returns -- @'Left' (e, r)@ where @r@ is the state of the failing 'Iter'. tryI :: (ChunkData t, Monad m, Exception e) => Iter t m a -> Iter t m (Either (e, IterR t m a) a) {-# INLINE tryI #-} tryI iter = genCatchI iter (curry $ return . Left) Right -- | A version of 'tryI' that catches all exceptions. Instead of -- returning the exception caught, it returns the failing 'IterR' -- (from which you can extract the exception if you really want it). -- The main use of this is for doing some kind of clean-up action, -- then re-throwing the exception with 'reRunIter'. -- -- For example, the following is a possible implementation of 'finallyI': -- -- > finallyI iter cleanup = do -- > er <- tryRI iter -- > cleanup -- > either reRunIter return er tryRI :: (ChunkData t, Monad m) => Iter t m a -> Iter t m (Either (IterR t m a) a) tryRI = onDone check where check (Fail e a c) = Done (Left $ Fail e a $ Just mempty) $ fromMaybe mempty c check r = Right <$> r -- | A variant of 'tryI' that just catches EOF errors. Returns -- 'Nothing' after an EOF error, and 'Just' the result otherwise. -- Should be much faster than trying to catch an EOF error of type -- 'Exception'. tryEOFI :: (ChunkData t, Monad m) => Iter t m a -> Iter t m (Maybe a) tryEOFI = onDone check where check (Fail (IterEOFErr _) _ c) = Done Nothing $ fromMaybe mempty c check r = Just <$> r -- | A varient of 'tryI' that returns the 'IterFail' state rather than -- trying to match a particular exception. tryFI :: (ChunkData t, Monad m) => Iter t m a -> Iter t m (Either IterFail a) tryFI = onDone check where check (Fail e _ c) = Done (Left e) $ fromMaybe mempty c check r = Right <$> r -- | Execute an 'Iter', then perform a cleanup action regardless of -- whether the 'Iter' threw an exception or not. Analogous to the -- standard library function @'finally'@. finallyI :: (ChunkData t, Monad m) => Iter t m a -> Iter t m b -> Iter t m a finallyI iter cleanup = do er <- tryRI iter cleanup >> either reRunIter return er -- | Execute an 'Iter' and perform a cleanup action if the 'Iter' -- threw an exception. Analogous to the standard library function -- @'onException'@. onExceptionI :: (ChunkData t, Monad m) => Iter t m a -> Iter t m b -> Iter t m a onExceptionI iter cleanup = catchI iter $ \(SomeException _) r -> cleanup >> reRunIter r -- | Simlar to 'tryI', but saves all data that has been fed to the -- 'Iter', and rewinds the input if the 'Iter' fails. (The @B@ in -- @tryBI@ stands for \"backtracking\".) Thus, if @tryBI@ returns -- @'Left' exception@, the next 'Iter' to be invoked will see the same -- input that caused the previous 'Iter' to fail. (For this reason, -- it makes no sense ever to call @'resumeI'@ on the 'Iter' you get -- back from @tryBI@, which is why @tryBI@ does not return the failing -- Iteratee the way 'tryI' does.) -- -- Because @tryBI@ saves a copy of all input, it can consume a lot of -- memory and should only be used when the 'Iter' argument is known to -- consume a bounded amount of data. tryBI :: (ChunkData t, Monad m, Exception e) => Iter t m a -> Iter t m (Either e a) tryBI = onDoneInput errToEither where errToEither (Done a c) _ = Done (Right a) c errToEither r@(Fail ie _ _) c = case fromException $ toException ie of Just e -> Done (Left e) c Nothing -> Right <$> r errToEither _ _ = error "tryBI" -- | A variant of 'tryBI' that, also rewinds input on failure, but -- returns the raw 'IterFail' structure, rather than mapping it to a -- particular exception. This is much faster because it requires no -- dynamic casts. However, the same warning applies that @tryFBI@ -- should not be applied to 'Iter's that could take unbounded input. tryFBI :: (ChunkData t, Monad m) => Iter t m a -> Iter t m (Either IterFail a) tryFBI = onDoneInput check where check (Done a c) _ = Done (Right a) c check (Fail e _ _) c = Done (Left e) c check _ _ = error "tryFBI" {- mapIterFail :: (ChunkData t, Monad m) => (IterFail -> IterFail) -> Iter t m a -> Iter t m a mapIterFail f = onDone check where check (Fail e a c) = Fail (f e) a c check r = r -} -- | Run an Iteratee, and if it throws a parse error by calling -- 'expectedI', then combine the exptected tokens with those of a -- previous parse error. combineExpected :: (ChunkData t, Monad m) => IterFail -- ^ Previous parse error -> IterR t m a -- ^ Result of second 'Iter'--if it fails the error -- should be combined with the first error -> IterR t m a combineExpected _ r@(Done _ _) = r combineExpected (IterExpected l1) (Fail (IterExpected l2) _ _) = Fail (IterExpected $ l1 ++ l2) Nothing Nothing combineExpected _ r@(Fail (IterExpected _) _ _) = r combineExpected e _ = Fail e Nothing Nothing -- | Try two Iteratees and return the result of executing the second -- if the first one throws a parse, EOF, or 'mzero' error. Note that -- "Data.IterIO.Parse" defines @'<|>'@ as an infix synonym for this -- function. -- -- The statement @multiParse a b@ is similar to @'ifParse' a return -- b@, but the two functions operate differently. Depending on the -- situation, only one of the two formulations may be correct. -- Specifically: -- -- * @'ifParse' a f b@ works by first executing @a@, saving a copy of -- all input consumed by @a@. If @a@ throws a parse error, the -- saved input is used to backtrack and execute @b@ on the same -- input that @a@ just rejected. If @a@ succeeds, @b@ is never -- run; @a@'s result is fed to @f@, and the resulting action is -- executed without backtracking (so any error thrown within @f@ -- will not be caught by this 'ifParse' expression). -- -- * Instead of saving input, @multiParse a b@ executes both @a@ and -- @b@ concurrently as input chunks arrive. If @a@ throws a parse -- error, then the result of executing @b@ is returned. If @a@ -- either succeeds or throws an exception that is not a parse -- error/EOF/'mzero', then the result of running @a@ is returned. -- -- * With @multiParse a b@, if @b@ returns a value, executes a -- monadic action via 'lift', or issues a control request via -- 'ctlI', then further processing of @b@ will be suspended until -- @a@ experiences a parse error, and thus the behavior will be -- equivalent to @'ifParse' a return b@. -- -- The main restriction on 'ifParse' is that @a@ must not consume -- unbounded amounts of input, or the program may exhaust memory -- saving the input for backtracking. Note that the second argument -- to 'ifParse' (i.e., 'return' in @ifParse a return b@) is a -- continuation for @a@ when @a@ succeeds. -- -- The advantage of @multiParse@ is that it can avoid storing -- unbounded amounts of input for backtracking purposes if both -- 'Iter's consume data. Another advantage is that with an expression -- such as @'ifParse' a f b@, sometimes it is not convenient to break -- the parse target into an action to execute with backtracking (@a@) -- and a continuation to execute without backtracking (@f@). The -- equivalent @multiParse (a >>= f) b@ avoids the need to do this, -- since it does not do backtracking. -- -- However, it is important to note that it is still possible to end -- up storing unbounded amounts of input with @multiParse@. For -- example, consider the following code: -- -- > total :: (Monad m) => Iter String m Int -- > total = multiParse parseAndSumIntegerList (return -1) -- Bad -- -- Here the intent is for @parseAndSumIntegerList@ to parse a -- (possibly huge) list of integers and return their sum. If there is -- a parse error at any point in the input, then the result is -- identical to having defined @total = return -1@. But @return -1@ -- succeeds immediately, consuming no input, which means that @total@ -- must return all left-over input for the next action (i.e., @next@ -- in @total >>= next@). Since @total@ has to look arbitrarily far -- into the input to determine that @parseAndSumIntegerList@ fails, in -- practice @total@ will have to save all input until it knows that -- @parseAndSumIntegerList@ succeeds. -- -- A better approach might be: -- -- @ -- total = multiParse parseAndSumIntegerList ('nullI' >> return -1) -- @ -- -- Here 'nullI' discards all input until an EOF is encountered, so -- there is no need to keep a copy of the input around. This makes -- sense so long as @total@ is the last or only Iteratee run on the -- input stream. (Otherwise, 'nullI' would have to be replaced with -- an Iteratee that discards input up to some end-of-list marker.) -- -- Another approach might be to avoid parsing combinators entirely and -- use: -- -- @ -- total = parseAndSumIntegerList ``catchI`` handler -- where handler \('IterNoParse' _) _ = return -1 -- @ -- -- This last definition of @total@ may leave the input in some -- partially consumed state. This is fine so long as @total@ is the -- last 'Iter' executed on the input stream. Otherwise, before -- throwing the parse error, @parseAndSumIntegerList@ would need to -- ensure the input is at some reasonable boundary point for whatever -- comes next. (The 'ungetI' function is sometimes helpful for this -- purpose.) multiParse :: (ChunkData t, Monad m) => Iter t m a -> Iter t m a -> Iter t m a multiParse a b = Iter $ \c -> check (runIter a c) (runIter b c) where check ra@(Done _ _) _ = ra check (IterF ia) (IterF ib) = IterF $ multiParse ia ib check (IterF ia) rb = IterF $ onDoneInput (\ra c -> check ra (runIterR rb c)) ia check ra rb = stepR ra (flip check rb) $ case ra of (Fail (IterException _) _ _) -> ra (Fail e _ _) -> onDoneR (combineExpected e) rb _ -> error "multiParse" -- | @ifParse iter success failure@ runs @iter@, but saves a copy of -- all input consumed using 'tryFBI'. (This means @iter@ must not -- consume unbounded amounts of input! See 'multiParse' for such -- cases.) If @iter@ succeeds, its result is passed to the function -- @success@. If @iter@ throws a parse error (with 'throwParseI'), -- throws an EOF error (with 'throwEOFI'), or executes 'mzero', then -- @failure@ is executed with the input re-wound (so that @failure@ is -- fed the same input that @iter@ was). If @iter@ throws any other -- type of exception, @ifParse@ passes the exception back and does not -- execute @failure@. -- -- See "Data.IterIO.Parse" for a discussion of this function and the -- related infix operator @\\/@ (which is a synonym for 'ifNoParse'). ifParse :: (ChunkData t, Monad m) => Iter t m a -- ^ Iteratee @iter@ to run with backtracking -> (a -> Iter t m b) -- ^ @success@ function -> Iter t m b -- ^ @failure@ action -> Iter t m b -- ^ result ifParse iter yes no = tryFBI iter >>= check where check (Right a) = yes a check (Left (IterException e)) = throwI e check (Left e) = onDone (combineExpected e) no -- | @ifNoParse@ is just 'ifParse' with the second and third arguments -- reversed. ifNoParse :: (ChunkData t, Monad m) => Iter t m a -> Iter t m b -> (a -> Iter t m b) -> Iter t m b {-# INLINE ifNoParse #-} ifNoParse iter no yes = ifParse iter yes no -- -- Some super-basic Iteratees -- -- | Sinks data like @\/dev\/null@, returning @()@ on EOF. nullI :: (Monad m, ChunkData t) => Iter t m () nullI = Iter $ \(Chunk _ eof) -> if eof then Done () chunkEOF else IterF nullI -- | Returns a non-empty amount of input data if there is any input -- left. Returns 'mempty' on an EOF condition. data0I :: (ChunkData t) => Iter t m t {-# INLINE data0I #-} data0I = iterF $ \(Chunk d eof) -> Done d (Chunk mempty eof) -- | Like 'data0I', but always returns non-empty data. Throws an -- exception on an EOF condition. dataI :: (ChunkData t) => Iter t m t {-# INLINE dataI #-} dataI = iterF nextChunk where nextChunk c@(Chunk d True) | null d = Fail eoferr Nothing (Just c) nextChunk (Chunk d eof) = Done d (Chunk mempty eof) eoferr = mkIterEOF "dataI" -- | A variant of 'data0I' that reads the whole input up to an EOF and -- returns it. pureI :: (Monad m, ChunkData t) => Iter t m t pureI = do peekI nullI; Iter $ \(Chunk t _) -> Done t chunkEOF -- | Returns the next 'Chunk' that either contains non-'null' data or -- has the EOF bit set. chunkI :: (Monad m, ChunkData t) => Iter t m (Chunk t) {-# INLINE chunkI #-} chunkI = iterF $ \c@(Chunk _ eof) -> Done c (Chunk mempty eof) -- | Keep running an 'Iter' until either its output is not 'null' or -- we have reached EOF. Return the the `Iter`'s value on the last -- (i.e., usually non-'null') iteration. whileNullI :: (ChunkData tIn, ChunkData tOut, Monad m) => Iter tIn m tOut -> Iter tIn m tOut whileNullI iter = loop where loop = do buf <- iter if null buf then do eof <- atEOFI if eof then return buf else loop else return buf -- | Runs an 'Iter' then rewinds the input state, so that the effect -- is to parse lookahead data. (See 'tryBI' if you want to rewind the -- input only when the 'Iter' fails.) peekI :: (ChunkData t, Monad m) => Iter t m a -> Iter t m a peekI = onDoneInput setResid -- | Does not actually consume any input, but returns 'True' if there -- is no more input data to be had. atEOFI :: (Monad m, ChunkData t) => Iter t m Bool atEOFI = iterF $ \c@(Chunk t _) -> Done (null t) c -- | Place data back onto the input stream, where it will be the next -- data consumed by subsequent 'Iter's. ungetI :: (ChunkData t) => t -> Iter t m () {-# INLINE ungetI #-} ungetI t = Iter $ \c -> Done () (mappend (chunk t) c) -- | Issue a control request. Returns 'CtlUnsupp' if the request type -- is unsupported. Otherwise, returns 'CtlDone' with the result if -- the request succeeds, or return @'CtlFail'@ if the request type is -- supported but attempting to execute the request caused an -- exception. safeCtlI :: (CtlCmd carg cres, Monad m) => carg -> Iter t m (CtlRes cres) safeCtlI carg = Iter $ IterC . CtlArg carg return -- | Issue a control request and return the result. Throws an -- exception of type 'IterCUnsupp' if the operation type was not -- supported by an enclosing enumerator. ctlI :: (CtlCmd carg cres, ChunkData t, Monad m) => carg -> Iter t m cres ctlI carg = do res <- safeCtlI carg case res of CtlUnsupp -> throwI $ IterCUnsupp carg CtlFail e -> throwI e CtlDone cres -> return cres -- -- Iter manipulation functions -- -- | A variant of 'stepR' that only works for the 'IterF' and 'IterC' -- states, not the 'IterM' state. (Because of this additional -- restriction, the input and output 'Monad' types @m1@ and @m2@ do -- not need to be the same.) stepR' :: IterR t m1 a -- ^ The 'IterR' that needs to be stepped. -> (IterR t m1 a -> IterR t m2 b) -- ^ Transformation function if the 'IterR' is in the 'IterF' -- or 'IterC' state. -> IterR t m2 b -- ^ Fallback if the 'IterR' is no longer active. -> IterR t m2 b {-# INLINE stepR' #-} stepR' (IterF (Iter i)) f _ = IterF $ Iter $ f . i stepR' (IterC (CtlArg a n c)) f _ = IterC $ CtlArg a (Iter . (f .) . runIter . n) c stepR' (IterM _) _ _ = error "stepR' (IterM)" stepR' _ _ notActive = notActive -- | Step an active 'IterR' (i.e., one in the 'IterF', 'IterM', or -- 'IterC' state) to its next state, and pass the result through a -- function. stepR :: (Monad m) => IterR t m a -- ^ The 'Iter' that needs to be stepped -> (IterR t m a -> IterR t m b) -- ^ Function to pass the 'Iter' to after stepping it. -> IterR t m b -- ^ Fallback if the 'Iter' can no longer be stepped -> IterR t m b {-# INLINE stepR #-} stepR (IterM m) f _ = IterM $ liftM f m stepR r f notActive = stepR' r f notActive -- | The equivalent of 'onDone' for 'IterR's. onDoneR :: (Monad m) => (IterR t m a -> IterR t m b) -> IterR t m a -> IterR t m b {-# INLINE onDoneR #-} onDoneR f = check where check r = stepR r check $ f r -- | Run an 'Iter' until it enters the 'Done' or 'Fail' state, then -- use a function to transform the 'IterR'. {-# INLINE onDone #-} onDone :: (Monad m) => (IterR t m a -> IterR t m b) -> Iter t m a -> Iter t m b onDone f i = Iter $ onDoneR f . runIter i -- | Like 'onDone', but also keeps a copy of all input consumed. (The -- residual input on the 'IterR' returned will be a suffix of the -- input returned.) onDoneInput :: (ChunkData t, Monad m) => (IterR t m a -> Chunk t -> IterR t m b) -> Iter t m a -> Iter t m b {-# INLINABLE onDoneInput #-} onDoneInput f = Iter . next id where next acc iter c = let check (IterF i) = IterF $ Iter $ next (acc . mappend c) i check r = stepR r check $ f r (acc c) in check $ runIter iter c -- | Get the residual data for an 'IterR' that is in no longer active -- or that is in the 'IterC' state. (It is an error to call this -- function on an 'IterR' in the 'IterF' or 'IterM' state.) getResid :: (ChunkData t) => IterR t m a -> Chunk t {-# INLINABLE getResid #-} getResid (Done _ c) = c getResid (Fail _ _ c) = fromMaybe mempty c getResid (IterC (CtlArg _ _ c)) = c getResid (IterF _) = error "getResid (IterF)" getResid (IterM _) = error "getResid (IterM)" -- | Set residual data for an 'IterR' that is not active. (It is an -- error to call this on an 'IterR' in the 'Done', 'IterM', or 'IterC' -- states.) setResid :: IterR t1 m1 a -> Chunk t2 -> IterR t2 m2 a {-# INLINABLE setResid #-} setResid (Done a _) = Done a setResid (Fail e a _) = Fail e a . Just setResid (IterF _) = error "setResid (IterF)" setResid (IterM _) = error "setResid (IterM)" setResid (IterC _) = error "setResid (IterC)" -- | Feed more input to an 'Iter' that has already been run (and hence -- is already an 'IterR'). In the event that the 'IterR' is -- requesting more input (i.e., is in the 'IterF' state), this is -- straight forward. However, if the 'Iter' is in some other state -- such as 'IterM', this function needs to save the input until such -- time as the 'IterR' is stepped to a new state (e.g., with 'stepR' -- or 'reRunIter'). runIterR :: (ChunkData t, Monad m) => IterR t m a -> Chunk t -> IterR t m a {-# INLINABLE runIterR #-} runIterR r c = if null c then r else check r where check (Done a c0) = Done a (mappend c0 c) check (IterF i) = runIter i c check (IterM m) = IterM $ liftM check m check (IterC (CtlArg a n c0)) = IterC $ CtlArg a n (mappend c0 c) check (Fail e a c0) = Fail e a $ fmap (`mappend` c) c0 -- | Turn an 'IterR' back into an 'Iter'. reRunIter :: (ChunkData t, Monad m) => IterR t m a -> Iter t m a {-# INLINE reRunIter #-} reRunIter (IterF i) = i reRunIter r = Iter $ runIterR r