{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE OverloadedStrings #-}
module FastTags.LexerTypes where
import Codec.Binary.UTF8.String (encodeChar)
import Control.Applicative
import Control.Monad.State.Strict
import Data.Char
import Data.IntSet (IntSet)
import qualified Data.IntSet as IntSet
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Word (Word8)
import Control.Monad.EitherK
import FastTags.Token
import qualified FastTags.Util as Util
{-# INLINE advanceLine #-}
advanceLine :: Char -> Line -> Line
advanceLine '\n' = increaseLine
advanceLine _ = id
countInputSpace :: AlexInput -> Int -> Int
countInputSpace input len =
countSpace $ Text.take len $ aiInput input
where
countSpace :: Text -> Int
countSpace = Text.foldl' inc 0
where
inc acc ' ' = acc + 1
inc acc '\t' = acc + 8
inc acc '\x01' = acc + 1
inc acc _ = acc
data AlexInput = AlexInput {
aiInput :: Text
, aiPrevChar :: {-# UNPACK #-} !Char
, aiBytes :: [Word8]
, aiLine :: {-# UNPACK #-} !Line
, aiTrackPrefixes :: Bool
, aiPrefix :: Text
, aiAbsPos :: {-# UNPACK #-} !Int
} deriving (Show, Eq, Ord)
mkAlexInput :: Text -> Bool -> AlexInput
mkAlexInput s trackPrefixes = AlexInput
{ aiInput = s'
, aiPrevChar = '\n'
, aiBytes = []
, aiLine = initLine
, aiTrackPrefixes = trackPrefixes
, aiPrefix = Text.empty
, aiAbsPos = initAbsPos
}
where
initLine = Line 0
initAbsPos = -1
s' = Text.cons '\n' $ Text.snoc (stripBOM s) '\n'
stripBOM :: Text -> Text
stripBOM xs =
fromMaybe xs $
Text.stripPrefix utf8BOM xs <|> Text.stripPrefix utf8BOM' xs
utf8BOM = "\xFFEF"
utf8BOM' = "\xFEFF"
mkSrcPos :: FilePath -> AlexInput -> SrcPos
mkSrcPos filename (AlexInput {aiLine, aiPrefix}) =
SrcPos { posFile = filename, posLine = aiLine, posPrefix = aiPrefix }
updatePrefix :: Char -> AlexInput -> AlexInput
updatePrefix c input@(AlexInput {aiTrackPrefixes, aiPrefix})
| aiTrackPrefixes = input
{ aiPrefix = case c of
'\n' -> Text.empty
_ -> Text.snoc aiPrefix c
}
| otherwise = input
data Context = CtxHaskell | CtxQuasiquoter
deriving (Show, Eq, Ord)
data AlexState = AlexState {
asInput :: AlexInput
, asCode :: {-# UNPACK #-} !Int
, asCommentDepth :: {-# UNPACK #-} !Int
, asQuasiquoterDepth :: {-# UNPACK #-} !Int
, asIndentationSize :: {-# UNPACK #-} !Int
, asContextStack :: [Context]
, asPositionsOfQuasiQuoteEnds :: Maybe IntSet
} deriving (Show, Eq, Ord)
mkAlexState :: AlexInput -> AlexState
mkAlexState input = AlexState
{ asInput = input
, asCode = 0
, asCommentDepth = 0
, asQuasiquoterDepth = 0
, asIndentationSize = 0
, asContextStack = []
, asPositionsOfQuasiQuoteEnds = Nothing
}
{-# INLINE pushContext #-}
pushContext :: (MonadState AlexState m) => Context -> m ()
pushContext ctx = modify (\s -> s { asContextStack = ctx : asContextStack s })
{-# INLINE modifyCommentDepth #-}
modifyCommentDepth :: (MonadState AlexState m) => (Int -> Int) -> m Int
modifyCommentDepth f = do
depth <- gets asCommentDepth
let depth' = f depth
modify $ \s -> s { asCommentDepth = depth' }
return depth'
{-# INLINE modifyQuasiquoterDepth #-}
modifyQuasiquoterDepth :: (MonadState AlexState m) => (Int -> Int) -> m Int
modifyQuasiquoterDepth f = do
depth <- gets asQuasiquoterDepth
let depth' = f depth
modify $ \s -> s { asQuasiquoterDepth = depth' }
return depth'
retrieveToken :: AlexInput -> Int -> Text
retrieveToken (AlexInput {aiInput}) len = Text.take len aiInput
{-# INLINE addIndentationSize #-}
addIndentationSize :: (MonadState AlexState m) => Int -> m ()
addIndentationSize x =
modify (\s -> s { asIndentationSize = x + asIndentationSize s })
data QQEndsState = QQEndsState
{ qqessPos :: {-# UNPACK #-} !Int
, qqessMap :: !IntSet
, qqessPrevChar :: {-# UNPACK #-} !Char
}
calculateQuasiQuoteEnds :: Int -> Text -> IntSet
calculateQuasiQuoteEnds startPos =
qqessMap . Text.foldl' combine (QQEndsState startPos IntSet.empty '\n')
where
combine :: QQEndsState -> Char -> QQEndsState
combine QQEndsState{qqessPos, qqessMap, qqessPrevChar} c = QQEndsState
{ qqessPos = qqessPos + 1
, qqessMap =
case (qqessPrevChar, c) of
('|', ']') -> IntSet.insert qqessPos qqessMap
(_, '⟧') -> IntSet.insert qqessPos qqessMap
_ -> qqessMap
, qqessPrevChar = c
}
type AlexM = EitherKT String (State AlexState)
runAlexM :: Bool -> Text -> AlexM a -> Either String a
runAlexM trackPrefixes input action =
evalState (runEitherKT action (return . Left) (return . Right)) s
where
s = mkAlexState $ mkAlexInput input trackPrefixes
{-# INLINE alexSetInput #-}
alexSetInput :: (MonadState AlexState m) => AlexInput -> m ()
alexSetInput input = modify $ \s -> s { asInput = input }
{-# INLINE alexSetStartCode #-}
alexSetStartCode :: (MonadState AlexState m) => Int -> m ()
alexSetStartCode code = modify $ \s -> s { asCode = code }
alexInputPrevChar :: AlexInput -> Char
alexInputPrevChar = aiPrevChar
alexGetByte :: AlexInput -> Maybe (Word8, AlexInput)
alexGetByte input@(AlexInput {aiInput, aiBytes, aiLine, aiAbsPos}) =
case aiBytes of
b:bs -> Just (b, input { aiBytes = bs })
[] -> nextChar
where
nextChar = case Text.uncons aiInput of
Nothing -> Nothing
Just (c, cs) -> encode (fromMaybe c $ fixChar c) cs
encode c cs =
case encodeChar c of
b:bs -> Just (b, updatePrefix c input')
where
input' = input { aiInput = cs
, aiBytes = bs
, aiPrevChar = c
, aiLine = advanceLine c aiLine
, aiAbsPos = aiAbsPos + 1
}
[] -> emptyUtfEncodingError
emptyUtfEncodingError = error
"alexGetByte: should not happen - utf8 encoding of a character is empty"
fixChar :: Char -> Maybe Char
fixChar '→' = Nothing
fixChar '∷' = Nothing
fixChar '⇒' = Nothing
fixChar '∀' = Nothing
fixChar c
| c <= '\x7f' = Nothing
| otherwise
= case generalCategory c of
UppercaseLetter -> Just upper
LowercaseLetter -> Just lower
TitlecaseLetter -> Just upper
ModifierLetter -> Just suffix
OtherLetter -> Just lower
DecimalNumber -> Just digit
OtherNumber -> Just digit
Space -> Just space
other ->
if Util.isSymbolCharacterCategory other
then Just symbol
else Nothing
where
space = '\x01'
upper = '\x02'
lower = '\x03'
symbol = '\x04'
digit = '\x05'
suffix = '\x06'