module FastTags.LexerTypes where
import Codec.Binary.UTF8.String (encodeChar)
import Control.Applicative
#if MIN_VERSION_mtl(2,2,0)
import Control.Monad.Except
#else
import Control.Monad.Error
#endif
import Control.Monad.State.Strict
import Data.Char
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Word (Word8)
import Control.Monad.EitherK
import FastTags.Token
import qualified FastTags.Util as Util
advanceLine :: Char -> Line -> Line
advanceLine '\n' = increaseLine
advanceLine _ = id
data AlexInput = AlexInput {
aiInput :: Text
, aiPrevChar :: !Char
, aiBytes :: [Word8]
, aiLine :: !Line
, aiTrackPrefixes :: Bool
, aiPrefix :: Text
} deriving (Show, Eq, Ord)
mkAlexInput :: Text -> Bool -> AlexInput
mkAlexInput s trackPrefixes =
AlexInput s' '\n' [] initLine trackPrefixes Text.empty
where
initLine = Line 0
s' = Text.cons '\n' $ Text.snoc (stripBOM s) '\n'
stripBOM :: Text -> Text
stripBOM xs =
fromMaybe xs $
Text.stripPrefix utf8BOM xs <|> Text.stripPrefix utf8BOM' xs
utf8BOM = "\xFFEF"
utf8BOM' = "\xFEFF"
mkSrcPos :: FilePath -> AlexInput -> SrcPos
mkSrcPos filename (AlexInput {aiLine, aiPrefix}) =
SrcPos { posFile = filename, posLine = aiLine, posPrefix = aiPrefix }
updatePrefix :: Char -> AlexInput -> AlexInput
updatePrefix c input@(AlexInput {aiTrackPrefixes, aiPrefix})
| aiTrackPrefixes = input
{ aiPrefix = case c of
'\n' -> Text.empty
_ -> Text.snoc aiPrefix c
}
| otherwise = input
data Context = CtxHaskell | CtxQuasiquoter
deriving (Show, Eq, Ord)
data AlexState = AlexState {
asInput :: AlexInput
, asFilename :: FilePath
, asCode :: !Int
, asCommentDepth :: !Int
, asQuasiquoterDepth :: !Int
, asContextStack :: [Context]
} deriving (Show, Eq, Ord)
mkAlexState :: FilePath -> AlexInput -> AlexState
mkAlexState filename input = AlexState input filename 0 0 0 []
pushContext :: (MonadState AlexState m) => Context -> m ()
pushContext ctx = modify (\s -> s { asContextStack = ctx : asContextStack s })
popContext :: (MonadState AlexState m, MonadError String m) => m Context
popContext = do
cs <- gets asContextStack
case cs of
[] -> throwError "Popping empty context stack"
c : cs' -> do
modify $ \s -> s { asContextStack = cs' }
return c
modifyCommentDepth :: (MonadState AlexState m) => (Int -> Int) -> m Int
modifyCommentDepth f = do
depth <- gets asCommentDepth
let depth' = f depth
modify $ \s -> s { asCommentDepth = depth' }
return depth'
modifyQuasiquoterDepth :: (MonadState AlexState m) => (Int -> Int) -> m Int
modifyQuasiquoterDepth f = do
depth <- gets asQuasiquoterDepth
let depth' = f depth
modify $ \s -> s { asQuasiquoterDepth = depth' }
return depth'
retrieveToken :: AlexInput -> Int -> Text
retrieveToken (AlexInput {aiInput}) len = Text.take len aiInput
type AlexM = EitherKT String (State AlexState)
runAlexM :: FilePath -> Bool -> Text -> AlexM a -> Either String a
runAlexM filename trackPrefixes input action =
evalState (runEitherKT action (return . Left) (return . Right)) s
where
s = mkAlexState filename $ mkAlexInput input trackPrefixes
alexSetInput :: (MonadState AlexState m) => AlexInput -> m ()
alexSetInput input = modify $ \s -> s { asInput = input }
alexSetStartCode :: (MonadState AlexState m) => Int -> m ()
alexSetStartCode code = modify $ \s -> s { asCode = code }
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = aiPrevChar
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte input@(AlexInput {aiInput, aiBytes, aiLine}) =
case aiBytes of
b:bs -> Just (b, input { aiBytes = bs })
[] -> nextChar
where
nextChar = case Text.uncons aiInput of
Nothing -> Nothing
Just (c, cs) -> encode (fromMaybe c $ fixChar c) cs
encode c cs =
case encodeChar c of
b:bs -> Just (b, updatePrefix c $ input')
where
input' = input { aiInput = cs
, aiBytes = bs
, aiPrevChar = c
, aiLine = advanceLine c aiLine
}
[] -> emptyUtfEncodingError
emptyUtfEncodingError = error
"alexGetByte: should not happen - utf8 encoding of a character is empty"
fixChar :: Char -> Maybe Char
fixChar '→' = Nothing
fixChar '∷' = Nothing
fixChar '⇒' = Nothing
fixChar '∀' = Nothing
fixChar c
| c <= '\x7f' = Nothing
| otherwise
= case generalCategory c of
UppercaseLetter -> Just upper
LowercaseLetter -> Just lower
TitlecaseLetter -> Just upper
ModifierLetter -> Just suffix
OtherLetter -> Just lower
DecimalNumber -> Just digit
OtherNumber -> Just digit
Space -> Just space
other ->
if Util.isSymbolCharacterCategory other
then Just symbol
else Nothing
where
space = '\x01'
upper = '\x02'
lower = '\x03'
symbol = '\x04'
digit = '\x05'
suffix = '\x06'