module Parsimony.Stream
( Token(..), Stream(..)
, ASCII, UTF8, ascii, utf8
) where
import Parsimony.Prim
import Parsimony.Pos
import Parsimony.Error
import qualified Data.ByteString as Strict (ByteString,uncons)
import qualified Data.ByteString.Lazy as Lazy (ByteString,uncons)
import Data.String.UTF8 (UTF8,UTF8Bytes,fromRep,replacement_char)
import qualified Data.String.UTF8 as UTF8 (uncons)
import Data.Word (Word8)
import Numeric (showHex)
class Token token where
updatePos :: token -> SourcePos -> SourcePos
showToken :: token -> String
instance Token Char where
updatePos c p = updatePosChar p c
showToken = show
instance Token Word8 where
updatePos _ p = incSourceColumn p 1
showToken b = "0x" ++ showHex b ""
class Token token => Stream stream token | stream -> token where
getToken :: PrimParser stream token
eof_err :: SourcePos -> Reply s a
eof_err p = Error $ newErrorMessage (UnExpect "end of input") p
genToken :: Token t => (i -> Maybe (t,i)) -> PrimParser i t
genToken unc (State i p) =
case unc i of
Nothing -> eof_err p
Just (t,ts) -> Ok t State { stateInput = ts
, statePos = updatePos t p
}
instance Token a => Stream [a] a where
getToken = genToken (\xs -> case xs of
[] -> Nothing
c : cs -> Just (c,cs))
instance Stream Strict.ByteString Word8 where
getToken = genToken Strict.uncons
instance Stream Lazy.ByteString Word8 where
getToken = genToken Lazy.uncons
newtype ASCII content = ASCII content
ascii :: content -> ASCII content
ascii = ASCII
utf8 :: content -> UTF8 content
utf8 = fromRep
instance Stream a Word8 => Stream (ASCII a) Char where
getToken (State (ASCII buf) p) =
case getToken (State buf p) of
Error err -> Error err
Ok w (State b1 p1) -> Ok (toEnum (fromEnum w)) (State (ASCII b1) p1)
instance Stream (UTF8 [Word8]) Char where
getToken = genTokenChar
instance Stream (UTF8 Strict.ByteString) Char where
getToken = genTokenChar
instance Stream (UTF8 Lazy.ByteString) Char where
getToken = genTokenChar
genTokenChar :: UTF8Bytes stream ix => PrimParser (UTF8 stream) Char
genTokenChar (State i p) =
case UTF8.uncons i of
Just (a,i1)
| a /= replacement_char -> Ok a (State i1 (updatePos a p))
| otherwise -> Error $ newErrorMessage
(Message "invalid UTF8 character") p
Nothing -> eof_err p