{-#LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleInstances #-} -- | Typeclasses to describe various aspects of parsable streams and tokens. -- The standard range of Haskell string-like types ('String', and both the lazy -- and strict flavors of 'ByteString' and 'Text') is supported already, as well -- as any 'List', so under normal circumstances, you should not need to touch -- this module directly. module Text.Parcom.Stream where -- | Typeclass for types that are suitable as source streams. Note that -- implementing just 'Stream' gives you only a small subset of Parcom\'s -- features; if you want to implement your own 'Stream' instances, you will -- most likely also want to implement 'Token' for the corresponding token type, -- 'Listish' to enable parsers that need to convert to or from lists of tokens, -- and 'Textish' to enable parsers that work on Unicode text. class Stream s t | s -> t where peek :: s -> t atEnd :: s -> Bool consume :: s -> s pop :: s -> (t, s) consume = snd . pop pop s = (peek s, consume s) -- | All lists are instances of 'Stream', and the corresponding token type is -- the list element type. Obviously, this also includes 'String' ('[Char]') and -- '[Word8]' instance Stream [a] a where peek = head atEnd = null consume = tail -- | This typeclass is pretty much required to do anything useful with Parcom; -- it is needed for Parcom to detect line endings so that parser errors will -- report the correct source positions. If you need to parse streams that do -- not support any meaningful concept of lines, consider implementing a dummy -- instance, like so: -- @ -- instance Token Foobar where -- isLineDelimiter _ = False -- @ -- This will treat the entire input as a single line. class Token t where isLineDelimiter :: t -> Bool -- | Unicode characters are valid tokens. instance Token Char where isLineDelimiter '\n' = True isLineDelimiter _ = False -- | List-like types. class Listish s t | s -> t where toList :: s -> [t] fromList :: [t] -> s -- | Any list is trivially list-like instance Listish [a] a where toList = id fromList = id -- | Enables parsing on a per-character basis rather than per-token. For stream -- types where the token type is 'Char' already, this is trivial, but for other -- streams (e.g., bytestrings), some extra processing is required to perform a -- conversion to Unicode. class Textish s where -- | Character-wise equivalent of 'peek'. Returns a pair, where the first -- element is 'Just' the parsed Unicode character, or Nothing on failure, -- and the second element is the number of tokens that the character has -- consumed. Generally, there are two reasons why parsing may fail: -- end-of-input, and a token sequence that does not represent a valid -- Unicode character according to the underlying stream's semantics. peekChar :: s -> (Maybe Char, Int) instance Textish [Char] where peekChar [] = (Nothing, 0) peekChar (c:[]) = (Just c, 1)