module Language.Lexer.Applicative (tokens, LexicalError(..)) where
import Text.Regex.Applicative
import Data.Loc
import Data.List
import Data.Typeable (Typeable)
import Control.Exception
annotate
:: String
-> String
-> [(Char, Pos, Pos)]
annotate src s = snd $ mapAccumL f (startPos src, startPos src) s
where
f (pos, prev_pos) ch =
let pos' = advancePos pos ch
in pos' `seq` ((pos', pos), (ch, pos, prev_pos))
data LexicalError = LexicalError !Pos
deriving Typeable
instance Show LexicalError where
show (LexicalError pos) = "Lexical error at " ++ displayPos pos
instance Exception LexicalError
tokens
:: forall token.
RE Char token
-> RE Char ()
-> String
-> String
-> [L token]
tokens pToken pJunk src = go . annotate src
where
go l = case l of
[] -> []
s@((_, pos1, _):_) ->
case findLongestPrefix re s of
Just (v, (_, pos1', _):_) | pos1' == pos1 ->
throw $ LexicalError pos1
Just (Just tok, rest) ->
let
pos2 =
case rest of
(_, _, p):_ -> p
[] -> case last s of (_, p, _) -> p
in L (Loc pos1 pos2) tok : go rest
Just (Nothing, rest) -> go rest
Nothing -> throw $ LexicalError pos1
re :: RE (Char, Pos, Pos) (Maybe token)
re = comap (\(c, _, _) -> c) $ (Just <$> pToken) <|> (Nothing <$ pJunk)
displayPos :: Pos -> String
displayPos (Pos src line col _) =
src ++ (colon . shows line . colon . shows (col+1)) ""
where
colon = (':' :)