module Text.Highlighter.Lexer where

import Control.Monad.Error
import Control.Monad.State
import Prelude hiding (lex)
import Text.Regex.PCRE.Light hiding (compile)
import Text.Regex.PCRE.Light.Char8 (compile)
import qualified Data.ByteString as BS

import Text.Highlighter.Types


data LexerState =
    LexerState
        { lsLexer :: Lexer
        , lsInput :: BS.ByteString
        , lsState :: [TokenMatcher]
        , lsLexed :: [Token]
        }
    deriving Show

type LexerM = ErrorT LexerError (State LexerState)

data LexerError
    = NoMatchFor BS.ByteString
    | OtherLexerError String
    deriving Show

instance Error LexerError where
    noMsg = OtherLexerError "unknown"
    strMsg = OtherLexerError

runLexer :: Lexer -> BS.ByteString -> Either LexerError [Token]
runLexer l s = evalState (runErrorT lex) (LexerState l s [lStart l] [])

lex :: LexerM [Token]
lex = do
    done <- gets (BS.null . lsInput)

    if done
        then gets lsLexed
        else do

    ms <- getState
    ts <- tryAll ms
    modify $ \ls -> ls { lsLexed = lsLexed ls ++ ts }
    lex
  where
    getState = gets (head . lsState)

tryAll :: [Match] -> LexerM [Token]
tryAll [] = do
    i <- gets lsInput
    throwError (NoMatchFor i)
tryAll (AnyOf ms:ms') =
    tryAll (ms ++ ms')
tryAll (m:ms) = do
    atbol <- isBOL
    fs <- gets (lFlags . lsLexer)

    let opts
            | atbol     = [exec_anchored]
            | otherwise = [exec_anchored, exec_notbol]

    i <- gets lsInput
    case match (compile (mRegexp m) fs) i opts of
        Just [] -> do
            nextState (mNextState m)
            return []

        Just (s:ss) -> do
            modify $ \ls -> ls { lsInput = BS.drop (BS.length s) i }

            nextState (mNextState m)

            toTokens (s:ss) (mType m)

        Nothing ->
            tryAll ms `catchError` trySkipping
  where
    trySkipping e = do
        case e of
            NoMatchFor _ ->
                tryAllFirst (m:ms)

            _ -> throwError e

tryAllFirst :: [Match] -> LexerM [Token]
tryAllFirst [] = do
    i <- gets lsInput
    throwError (NoMatchFor i)
tryAllFirst (AnyOf ms:ms') =
    tryAllFirst (ms ++ ms')
tryAllFirst (m:ms) = do
    atbol <- isBOL
    fs <- gets (lFlags . lsLexer)

    let opts
            | atbol     = []
            | otherwise = [exec_notbol]

    i <- gets lsInput
    case match (compile (mRegexp m) fs) i opts of
        Just (s:ss) -> do
            let (skipped, next) = skipFailed i s
            modify $ \ls -> ls { lsInput = next }
            ts <- toTokens (s:ss) (mType m)
            return (Token Error skipped:ts)

        _ -> tryAllFirst ms

    
isBOL :: LexerM Bool
isBOL = do
    ld <- gets lsLexed
    case ld of
        [] -> return True
        ts ->
            let nonempty = dropWhile (BS.null . tText) (reverse ts)
            in
                case nonempty of
                    [] -> return True
                    (t:_) -> return (BS.last (tText t) == 10)

toTokens :: [BS.ByteString] -> TokenType -> LexerM [Token]
toTokens (s:_) (Using l) = do
    either throwError return (runLexer l s)
toTokens (_:ss) (ByGroups ts) = liftM concat $ zipWithM (\s t -> toTokens [s] t) ss ts
toTokens (s:_) t = return [Token t s]
toTokens [] _ = return []

-- Given the starting point, return the text preceding and after
-- the failing regexp match
skipFailed :: BS.ByteString -> BS.ByteString -> (BS.ByteString, BS.ByteString)
skipFailed i r
    | r `BS.isPrefixOf` i = (BS.empty, BS.drop (BS.length r) i)
    | otherwise =
        let (pre, next) = skipFailed (BS.tail i) r
        in (BS.cons (BS.head i) pre, next)
            
nextState :: NextState -> LexerM ()
nextState Continue = return ()
nextState Pop =
    modify $ \ls -> ls { lsState = tail (lsState ls) }
nextState (PopNum n) =
    modify $ \ls -> ls { lsState = drop n (lsState ls) }
nextState Push =
    modify $ \ls -> ls { lsState = head (lsState ls) : lsState ls }
nextState (GoTo n) =
    modify $ \ls -> ls { lsState = n : lsState ls }
nextState (DoAll nss) = mapM_ nextState nss
nextState (Combined nss) =
    modify $ \ls -> ls { lsState = concat nss : lsState ls }