{-# LANGUAGE ScopedTypeVariables, DeriveDataTypeable #-} -- | For an example, see -- module Language.Lexer.Applicative (tokens, LexicalError(..)) where import Text.Regex.Applicative import Data.Loc import Data.List import Data.Typeable (Typeable) import Control.Exception annotate :: String -- ^ source file name -> String -- ^ contents -> [(Char, Pos, Pos)] -- ^ the character, its position, and the previous position annotate src s = snd $ mapAccumL f (startPos src, startPos src) s where f (pos, prev_pos) ch = let pos' = advancePos pos ch in pos' `seq` ((pos', pos), (ch, pos, prev_pos)) -- | The lexical error exception data LexicalError = LexicalError !Pos deriving Typeable instance Show LexicalError where show (LexicalError pos) = "Lexical error at " ++ displayPos pos instance Exception LexicalError -- | The lexer. -- -- In case of a lexical error, throws the 'LexicalError' exception. -- This may seem impure compared to using 'Either', but it allows to -- consume the token list lazily. -- -- Both token and whitespace regexes consume as many characters as possible -- (the maximal munch rule). When a regex returns without consuming any -- characters, a lexical error is signaled. tokens :: forall token. RE Char token -- ^ regular expression for tokens -> RE Char () -- ^ regular expression for whitespace and comments -> String -- ^ source file name (used in locations) -> String -- ^ source text -> [L token] tokens pToken pJunk src = go . annotate src where go l = case l of [] -> [] s@((_, pos1, _):_) -> case findLongestPrefix re s of -- If the longest match is empty, we have a lexical error Just (v, (_, pos1', _):_) | pos1' == pos1 -> throw $ LexicalError pos1 Just (Just tok, rest) -> let pos2 = case rest of (_, _, p):_ -> p [] -> case last s of (_, p, _) -> p in L (Loc pos1 pos2) tok : go rest Just (Nothing, rest) -> go rest Nothing -> throw $ LexicalError pos1 re :: RE (Char, Pos, Pos) (Maybe token) re = comap (\(c, _, _) -> c) $ (Just <$> pToken) <|> (Nothing <$ pJunk) -- | Format a position displayPos :: Pos -> String displayPos (Pos src line col _) = src ++ (colon . shows line . colon . shows (col+1)) "" where colon = (':' :)