module FastTags.LexerTypes where
import Codec.Binary.UTF8.String (encodeChar)
import Control.Applicative
import Control.Monad.Error
import Control.Monad.State.Strict
import Data.Char
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Word (Word8)
import Control.Monad.EitherK
import FastTags.Token
advanceLine :: Char -> Line -> Line
advanceLine '\n' = increaseLine
advanceLine _ = id
data AlexInput = AlexInput {
aiInput :: Text
, aiPrevChar :: !Char
, aiBytes :: [Word8]
, aiLine :: !Line
, aiTrackPrefixes :: Bool
, aiPrefix :: Text
} deriving (Show, Eq, Ord)
mkAlexInput :: Text -> Bool -> AlexInput
mkAlexInput s trackPrefixes =
AlexInput s' '\n' [] initLine trackPrefixes Text.empty
where
initLine = Line 0
s' = Text.cons '\n' $ Text.snoc (stripBOM s) '\n'
stripBOM :: Text -> Text
stripBOM xs =
fromMaybe xs $
Text.stripPrefix utf8BOM xs <|> Text.stripPrefix utf8BOM' xs
utf8BOM = "\xFFEF"
utf8BOM' = "\xFEFF"
mkSrcPos :: FilePath -> AlexInput -> SrcPos
mkSrcPos filename (AlexInput {aiLine, aiPrefix}) =
SrcPos { posFile = filename, posLine = aiLine, posPrefix = aiPrefix }
updatePrefix :: Char -> AlexInput -> AlexInput
updatePrefix c input@(AlexInput {aiTrackPrefixes, aiPrefix})
| aiTrackPrefixes = input
{ aiPrefix = case c of
'\n' -> Text.empty
_ -> Text.snoc aiPrefix c
}
| otherwise = input
data Context = CtxHaskell | CtxQuasiquoter
deriving (Show, Eq, Ord)
data AlexState = AlexState {
asInput :: AlexInput
, asFilename :: FilePath
, asCode :: !Int
, asCommentDepth :: !Int
, asQuasiquoterDepth :: !Int
, asContextStack :: [Context]
} deriving (Show, Eq, Ord)
mkAlexState :: FilePath -> AlexInput -> AlexState
mkAlexState filename input = AlexState input filename 0 0 0 []
pushContext :: (MonadState AlexState m) => Context -> m ()
pushContext ctx = modify (\s -> s { asContextStack = ctx : asContextStack s })
popContext :: (MonadState AlexState m, MonadError String m) => m Context
popContext = do
cs <- gets asContextStack
case cs of
[] -> throwError "Popping empty context stack"
c : cs' -> do
modify $ \s -> s { asContextStack = cs' }
return c
modifyCommentDepth :: (MonadState AlexState m) => (Int -> Int) -> m Int
modifyCommentDepth f = do
depth <- gets asCommentDepth
let depth' = f depth
modify $ \s -> s { asCommentDepth = depth' }
return depth'
modifyQuasiquoterDepth :: (MonadState AlexState m) => (Int -> Int) -> m Int
modifyQuasiquoterDepth f = do
depth <- gets asQuasiquoterDepth
let depth' = f depth
modify $ \s -> s { asQuasiquoterDepth = depth' }
return depth'
retrieveToken :: AlexInput -> Int -> Text
retrieveToken (AlexInput {aiInput}) len = Text.take len aiInput
type AlexM = EitherKT String (State AlexState)
type AlexMConstraint m = (MonadError String m, MonadState AlexState m)
runAlexM :: FilePath -> Bool -> Text -> AlexM a -> Either String a
runAlexM filename trackPrefixes input action =
evalState (runEitherKT action (return . Left) (return . Right)) s
where
s = mkAlexState filename $ mkAlexInput input trackPrefixes
alexSetInput :: (MonadState AlexState m) => AlexInput -> m ()
alexSetInput input = modify $ \s -> s { asInput = input }
alexSetStartCode :: (MonadState AlexState m) => Int -> m ()
alexSetStartCode code = modify $ \s -> s { asCode = code }
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = aiPrevChar
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte input@(AlexInput {aiInput, aiBytes, aiLine}) =
case aiBytes of
b:bs -> Just (b, input { aiBytes = bs })
[] -> nextChar
where
nextChar = case Text.uncons aiInput of
Nothing -> Nothing
Just (c, cs) -> encode (fromMaybe c $ fixChar c) cs
encode c cs =
case encodeChar c of
b:bs -> Just (b, updatePrefix c $ input')
where
input' = input { aiInput = cs
, aiBytes = bs
, aiPrevChar = c
, aiLine = advanceLine c aiLine
}
[] -> error "alexGetByte: should not happen - utf8 encoding of\
\ a character is empty"
fixChar :: Char -> Maybe Char
fixChar '→' = Nothing
fixChar '∷' = Nothing
fixChar '⇒' = Nothing
fixChar '∀' = Nothing
fixChar c
| c <= '\x7f' = Nothing
| otherwise
= case generalCategory c of
UppercaseLetter -> Just upper
LowercaseLetter -> Just lower
TitlecaseLetter -> Just upper
ModifierLetter -> Just suffix
OtherLetter -> Just lower
DecimalNumber -> Just digit
OtherNumber -> Just digit
ConnectorPunctuation -> Just symbol
DashPunctuation -> Just symbol
OtherPunctuation -> Just symbol
MathSymbol -> Just symbol
CurrencySymbol -> Just symbol
ModifierSymbol -> Just symbol
OtherSymbol -> Just symbol
Space -> Just space
_other -> Nothing
where
space = '\x01'
upper = '\x02'
lower = '\x03'
symbol = '\x04'
digit = '\x05'
suffix = '\x06'