module Text.Highlighter.Lexer (runLexer) where
import Control.Monad.Except (ExceptT, runExceptT, throwError, catchError)
import Control.Monad.State (State, gets, modify, evalState)
import Text.Regex.PCRE.Light hiding (compile)
import Text.Regex.PCRE.Light.Char8 (compile)
import qualified Data.ByteString as BS
import Data.Sequence (Seq, empty, singleton, (><), viewl, null, ViewL(..))
import Data.Monoid ((<>))
import Control.Applicative ((<$>))
import Data.Foldable (toList, foldr1, mapM_)
import Prelude hiding (lex, foldr1, mapM_, concat, head, drop, tail, reverse, dropWhile, null)
import qualified Prelude as P
import Text.Highlighter.Types
data LexerState =
LexerState
{ lsLexer :: Lexer
, lsInput :: BS.ByteString
, lsState :: [TokenMatcher]
, lsLexed :: (Seq Token)
, lastNotNull :: Bool
}
deriving Show
type LexerM = ExceptT LexerError (State LexerState)
data LexerError
= NoMatchFor BS.ByteString
| OtherLexerError String
deriving Show
runLexer :: Lexer -> BS.ByteString -> Either LexerError [Token]
runLexer l s = toList <$> runLexer' l s
runLexer' :: Lexer -> BS.ByteString -> Either LexerError (Seq Token)
runLexer' l s = evalState (runExceptT lex) (LexerState l s [lStart l] empty True)
lex :: LexerM (Seq Token)
lex = do
done <- gets (BS.null . lsInput)
if done
then gets lsLexed
else do
ms <- getState
ts <- tryAll ms
if null ts || (BS.null . tText . head $ ts)
then modify $ \ls -> ls { lsLexed = lsLexed ls >< ts }
else modify $ \ls -> ls { lsLexed = lsLexed ls >< ts
, lastNotNull = (BS.last . tText . head $ ts) == 10
}
lex
where
getState = gets (P.head . lsState)
isBOL :: LexerM Bool
isBOL = gets lastNotNull
head :: Seq a -> a
head x = let (b :< _) = viewl x
in b
tryAll :: [Match] -> LexerM (Seq Token)
tryAll [] = do
i <- gets lsInput
throwError (NoMatchFor i)
tryAll (AnyOf ms:ms') =
tryAll (ms ++ ms')
tryAll (m:ms) = do
atbol <- isBOL
fs <- gets (lFlags . lsLexer)
let opts
| atbol = [exec_anchored]
| otherwise = [exec_anchored, exec_notbol]
i <- gets lsInput
case match (compile (mRegexp m) fs) i opts of
Just [] -> do
nextState (mNextState m) []
return empty
Just (s:ss) -> do
modify $ \ls -> ls { lsInput = BS.drop (BS.length s) i }
nextState (mNextState m) (s:ss)
toTokens (s:ss) (mType m)
Nothing ->
tryAll ms `catchError` trySkipping
where
trySkipping (NoMatchFor _) = tryAllFirst (m:ms)
trySkipping e = throwError e
tryAllFirst :: [Match] -> LexerM (Seq Token)
tryAllFirst [] = do
i <- gets lsInput
throwError (NoMatchFor i)
tryAllFirst (AnyOf ms:ms') =
tryAllFirst (ms ++ ms')
tryAllFirst (m:ms) = do
atbol <- isBOL
fs <- gets (lFlags . lsLexer)
let opts
| atbol = []
| otherwise = [exec_notbol]
i <- gets lsInput
case match (compile (mRegexp m) fs) i opts of
Just (s:ss) -> do
let (skipped, next) = skipFailed i s
modify $ \ls -> ls { lsInput = next }
ts <- toTokens (s:ss) (mType m)
return . singleton . Token Error $ (skipped <> (tText $ head ts))
_ -> tryAllFirst ms
toTokens :: [BS.ByteString] -> TokenType -> LexerM (Seq Token)
toTokens (s:_) (Using l) = either throwError return (runLexer' l s)
toTokens (_:ss) (ByGroups ts) = foldr1 (><) <$> mapM (\(s,t) -> toTokens [s] t) (P.zip ss ts)
toTokens (s:_) t = return $ singleton $ Token t s
toTokens [] _ = return empty
skipFailed :: BS.ByteString -> BS.ByteString -> (BS.ByteString, BS.ByteString)
skipFailed i r
| r `BS.isPrefixOf` i = (BS.empty, BS.drop (BS.length r) i)
| otherwise =
let (pre, next) = skipFailed (BS.tail i) r
in (BS.cons (BS.head i) pre, next)
nextState :: NextState -> [BS.ByteString] -> LexerM ()
nextState Continue _ = return ()
nextState Pop _ =
modify $ \ls -> ls { lsState = P.tail (lsState ls) }
nextState (PopNum n) _ =
modify $ \ls -> ls { lsState = P.drop n (lsState ls) }
nextState Push _ =
modify $ \ls -> ls { lsState = P.head (lsState ls) : lsState ls }
nextState (GoTo n) _ =
modify $ \ls -> ls { lsState = n : lsState ls }
nextState (CapturesTo f) cs =
modify $ \ls -> ls { lsState = f (map fromBS cs) : lsState ls }
where
fromBS = map (toEnum . fromEnum) . BS.unpack
nextState (DoAll nss) cs = mapM_ (flip nextState cs) nss
nextState (Combined nss) _ =
modify $ \ls -> ls { lsState = P.concat nss : lsState ls }