{-#LANGUAGE MultiParamTypeClasses, FunctionalDependencies, FlexibleInstances #-}
-- | Typeclasses to describe various aspects of parsable streams and tokens.
-- The standard range of Haskell string-like types ('String', and both the lazy
-- and strict flavors of 'ByteString' and 'Text') is supported already, as well
-- as any 'List', so under normal circumstances, you should not need to touch
-- this module directly.

module Text.Parcom.Stream
where

-- | Typeclass for types that are suitable as source streams. Note that
-- implementing just 'Stream' gives you only a small subset of Parcom\'s
-- features; if you want to implement your own 'Stream' instances, you will
-- most likely also want to implement 'Token' for the corresponding token type,
-- 'Listish' to enable parsers that need to convert to or from lists of tokens,
-- and 'Textish' to enable parsers that work on Unicode text.
class Stream s t | s -> t where
    peek :: s -> t
    atEnd :: s -> Bool
    consume :: s -> s
    pop :: s -> (t, s)
    consume = snd . pop
    pop s = (peek s, consume s)

-- | All lists are instances of 'Stream', and the corresponding token type is
-- the list element type. Obviously, this also includes 'String' ('[Char]') and
-- '[Word8]'
instance Stream [a] a where
    peek = head
    atEnd = null
    consume = tail

-- | This typeclass is pretty much required to do anything useful with Parcom;
-- it is needed for Parcom to detect line endings so that parser errors will
-- report the correct source positions. If you need to parse streams that do
-- not support any meaningful concept of lines, consider implementing a dummy
-- instance, like so:
-- @
-- instance Token Foobar where
--     isLineDelimiter _ = False
-- @
-- This will treat the entire input as a single line.
class Token t where
    isLineDelimiter :: t -> Bool

-- | Unicode characters are valid tokens.
instance Token Char where
    isLineDelimiter '\n' = True
    isLineDelimiter _ = False

-- | List-like types.
class Listish s t | s -> t where
    toList :: s  -> [t]
    fromList :: [t] -> s

-- | Any list is trivially list-like
instance Listish [a] a where
    toList = id
    fromList = id

-- | Enables parsing on a per-character basis rather than per-token. For stream
-- types where the token type is 'Char' already, this is trivial, but for other
-- streams (e.g., bytestrings), some extra processing is required to perform a
-- conversion to Unicode.
class Textish s where
    -- | Character-wise equivalent of 'peek'. Returns a pair, where the first
    -- element is 'Just' the parsed Unicode character, or Nothing on failure,
    -- and the second element is the number of tokens that the character has
    -- consumed. Generally, there are two reasons why parsing may fail:
    -- end-of-input, and a token sequence that does not represent a valid
    -- Unicode character according to the underlying stream's semantics.
    peekChar :: s -> (Maybe Char, Int)

instance Textish [Char] where
    peekChar [] = (Nothing, 0)
    peekChar (c:[]) = (Just c, 1)