{-# LANGUAGE CPP #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
module GHC.SyntaxHighlighter
( Token (..)
, Loc (..)
, tokenizeHaskell
, tokenizeHaskellLoc )
where
import Control.Monad
import Data.Bits
import Data.List (unfoldr, foldl')
import Data.Text (Text)
import Data.Word (Word64)
import FastString (mkFastString)
import Module (newSimpleUnitId, ComponentId (..))
import SrcLoc
import StringBuffer
import qualified Data.Text as T
import qualified EnumSet as ES
import qualified Lexer as L
data Token
= KeywordTok
| PragmaTok
| SymbolTok
| VariableTok
| ConstructorTok
| OperatorTok
| CharTok
| StringTok
| IntegerTok
| RationalTok
| CommentTok
| SpaceTok
| OtherTok
deriving (Eq, Ord, Enum, Bounded, Show)
data Loc = Loc !Int !Int !Int !Int
deriving (Eq, Ord, Show)
tokenizeHaskell :: Text -> Maybe [(Token, Text)]
tokenizeHaskell input = sliceInputStream input <$> tokenizeHaskellLoc input
sliceInputStream :: Text -> [(Token, Loc)] -> [(Token, Text)]
sliceInputStream input toks = unfoldr sliceOnce (initText' input, toks)
where
sliceOnce (txt, []) = do
(txt', chunk) <- tryFetchRest txt
return ((SpaceTok, chunk), (txt', []))
sliceOnce (txt, tss@((t, l):ts)) =
case tryFetchSpace txt l of
Nothing ->
let (txt', chunk) = fetchSpan txt l
in Just ((t, chunk), (txt', ts))
Just (txt', chunk) ->
Just ((SpaceTok, chunk), (txt', tss))
tokenizeHaskellLoc :: Text -> Maybe [(Token, Loc)]
tokenizeHaskellLoc input =
case L.unP pLexer parseState of
L.PFailed {} -> Nothing
L.POk _ x -> Just x
where
location = mkRealSrcLoc (mkFastString "") 1 1
buffer = stringToStringBuffer (T.unpack input)
parseState = L.mkPStatePure parserFlags buffer location
parserFlags = L.ParserFlags
{ L.pWarningFlags = ES.empty
, L.pExtensionFlags = ES.empty
, L.pThisPackage = newSimpleUnitId (ComponentId (mkFastString ""))
, L.pExtsBitmap = extsBitmap
}
pLexer :: L.P [(Token, Loc)]
pLexer = go
where
go = do
r <- L.lexer False return
case r of
L _ L.ITeof -> return []
_ ->
case fixupToken r of
Nothing -> go
Just x -> (x:) <$> go
fixupToken :: Located L.Token -> Maybe (Token, Loc)
fixupToken (L srcSpan tok) = (classifyToken tok,) <$> srcSpanToLoc srcSpan
srcSpanToLoc :: SrcSpan -> Maybe Loc
srcSpanToLoc (RealSrcSpan rss) =
let start = realSrcSpanStart rss
end = realSrcSpanEnd rss
in if start == end
then Nothing
else Just $ Loc (srcLocLine start)
(srcLocCol start)
(srcLocLine end)
(srcLocCol end)
srcSpanToLoc _ = Nothing
classifyToken :: L.Token -> Token
classifyToken = \case
L.ITas -> KeywordTok
L.ITcase -> KeywordTok
L.ITclass -> KeywordTok
L.ITdata -> KeywordTok
L.ITdefault -> KeywordTok
L.ITderiving -> KeywordTok
L.ITdo -> KeywordTok
L.ITelse -> KeywordTok
L.IThiding -> KeywordTok
L.ITforeign -> KeywordTok
L.ITif -> KeywordTok
L.ITimport -> KeywordTok
L.ITin -> KeywordTok
L.ITinfix -> KeywordTok
L.ITinfixl -> KeywordTok
L.ITinfixr -> KeywordTok
L.ITinstance -> KeywordTok
L.ITlet -> KeywordTok
L.ITmodule -> KeywordTok
L.ITnewtype -> KeywordTok
L.ITof -> KeywordTok
L.ITqualified -> KeywordTok
L.ITthen -> KeywordTok
L.ITtype -> KeywordTok
L.ITwhere -> KeywordTok
L.ITforall _ -> KeywordTok
L.ITexport -> KeywordTok
L.ITlabel -> KeywordTok
L.ITdynamic -> KeywordTok
L.ITsafe -> KeywordTok
L.ITinterruptible -> KeywordTok
L.ITunsafe -> KeywordTok
L.ITstdcallconv -> KeywordTok
L.ITccallconv -> KeywordTok
L.ITcapiconv -> KeywordTok
L.ITprimcallconv -> KeywordTok
L.ITjavascriptcallconv -> KeywordTok
L.ITmdo -> KeywordTok
L.ITfamily -> KeywordTok
L.ITrole -> KeywordTok
L.ITgroup -> KeywordTok
L.ITby -> KeywordTok
L.ITusing -> KeywordTok
L.ITpattern -> KeywordTok
L.ITstatic -> KeywordTok
L.ITstock -> KeywordTok
L.ITanyclass -> KeywordTok
#if MIN_VERSION_ghc(8,6,1)
L.ITvia -> KeywordTok
#endif
L.ITunit -> KeywordTok
L.ITsignature -> KeywordTok
L.ITdependency -> KeywordTok
L.ITrequires -> KeywordTok
L.ITinline_prag {} -> PragmaTok
L.ITspec_prag _ -> PragmaTok
L.ITspec_inline_prag {} -> PragmaTok
L.ITsource_prag _ -> PragmaTok
L.ITrules_prag _ -> PragmaTok
L.ITwarning_prag _ -> PragmaTok
L.ITdeprecated_prag _ -> PragmaTok
L.ITline_prag _ -> PragmaTok
L.ITcolumn_prag _ -> PragmaTok
L.ITscc_prag _ -> PragmaTok
L.ITgenerated_prag _ -> PragmaTok
L.ITcore_prag _ -> PragmaTok
L.ITunpack_prag _ -> PragmaTok
L.ITnounpack_prag _ -> PragmaTok
L.ITann_prag _ -> PragmaTok
L.ITcomplete_prag _ -> PragmaTok
L.ITclose_prag -> PragmaTok
L.IToptions_prag _ -> PragmaTok
L.ITinclude_prag _ -> PragmaTok
L.ITlanguage_prag -> PragmaTok
#if !MIN_VERSION_ghc(8,6,1)
L.ITvect_prag _ -> PragmaTok
L.ITvect_scalar_prag _ -> PragmaTok
L.ITnovect_prag _ -> PragmaTok
#endif
L.ITminimal_prag _ -> PragmaTok
L.IToverlappable_prag _ -> PragmaTok
L.IToverlapping_prag _ -> PragmaTok
L.IToverlaps_prag _ -> PragmaTok
L.ITincoherent_prag _ -> PragmaTok
L.ITctype _ -> PragmaTok
L.ITdotdot -> SymbolTok
L.ITcolon -> SymbolTok
L.ITdcolon _ -> SymbolTok
L.ITequal -> SymbolTok
L.ITlam -> SymbolTok
L.ITlcase -> SymbolTok
L.ITvbar -> SymbolTok
L.ITlarrow _ -> SymbolTok
L.ITrarrow _ -> SymbolTok
L.ITat -> SymbolTok
L.ITtilde -> SymbolTok
#if !MIN_VERSION_ghc(8,6,1)
L.ITtildehsh -> SymbolTok
#endif
L.ITdarrow _ -> SymbolTok
L.ITbang -> SymbolTok
#if MIN_VERSION_ghc(8,6,1)
L.ITstar _ -> SymbolTok
#endif
L.ITbiglam -> SymbolTok
L.ITocurly -> SymbolTok
L.ITccurly -> SymbolTok
L.ITvocurly -> SymbolTok
L.ITvccurly -> SymbolTok
L.ITobrack -> SymbolTok
L.ITopabrack -> SymbolTok
L.ITcpabrack -> SymbolTok
L.ITcbrack -> SymbolTok
L.IToparen -> SymbolTok
L.ITcparen -> SymbolTok
L.IToubxparen -> SymbolTok
L.ITcubxparen -> SymbolTok
L.ITsemi -> SymbolTok
L.ITcomma -> SymbolTok
L.ITunderscore -> SymbolTok
L.ITbackquote -> SymbolTok
L.ITsimpleQuote -> SymbolTok
L.ITminus -> OperatorTok
L.ITdot -> OperatorTok
L.ITvarid _ -> VariableTok
L.ITconid _ -> ConstructorTok
L.ITvarsym _ -> OperatorTok
L.ITconsym _ -> OperatorTok
L.ITqvarid _ -> VariableTok
L.ITqconid _ -> ConstructorTok
L.ITqvarsym _ -> OperatorTok
L.ITqconsym _ -> OperatorTok
L.ITdupipvarid _ -> VariableTok
L.ITlabelvarid _ -> VariableTok
L.ITchar _ _ -> CharTok
L.ITstring _ _ -> StringTok
L.ITinteger _ -> IntegerTok
L.ITrational _ -> RationalTok
L.ITprimchar _ _ -> CharTok
L.ITprimstring _ _ -> StringTok
L.ITprimint _ _ -> IntegerTok
L.ITprimword _ _ -> IntegerTok
L.ITprimfloat _ -> RationalTok
L.ITprimdouble _ -> RationalTok
L.ITopenExpQuote _ _ -> SymbolTok
L.ITopenPatQuote -> SymbolTok
L.ITopenDecQuote -> SymbolTok
L.ITopenTypQuote -> SymbolTok
L.ITcloseQuote _ -> SymbolTok
L.ITopenTExpQuote _ -> SymbolTok
L.ITcloseTExpQuote -> SymbolTok
L.ITidEscape _ -> SymbolTok
L.ITparenEscape -> SymbolTok
L.ITidTyEscape _ -> SymbolTok
L.ITparenTyEscape -> SymbolTok
L.ITtyQuote -> SymbolTok
L.ITquasiQuote _ -> SymbolTok
L.ITqQuasiQuote _ -> SymbolTok
L.ITproc -> KeywordTok
L.ITrec -> KeywordTok
L.IToparenbar _ -> SymbolTok
L.ITcparenbar _ -> SymbolTok
L.ITlarrowtail _ -> SymbolTok
L.ITrarrowtail _ -> SymbolTok
L.ITLarrowtail _ -> SymbolTok
L.ITRarrowtail _ -> SymbolTok
L.ITtypeApp -> SymbolTok
L.ITunknown _ -> OtherTok
L.ITeof -> OtherTok
L.ITdocCommentNext _ -> CommentTok
L.ITdocCommentPrev _ -> CommentTok
L.ITdocCommentNamed _ -> CommentTok
L.ITdocSection _ _ -> CommentTok
L.ITdocOptions _ -> CommentTok
L.ITlineComment _ -> CommentTok
L.ITblockComment _ -> CommentTok
data Text' = Text'
{-# UNPACK #-} !Int
{-# UNPACK #-} !Int
{-# UNPACK #-} !Text
deriving (Show)
initText' :: Text -> Text'
initText' = Text' 1 1
tryFetchSpace :: Text' -> Loc -> Maybe (Text', Text)
tryFetchSpace txt (Loc sl sc _ _) =
let (txt', r) = reachLoc txt sl sc
in if T.null r
then Nothing
else Just (txt', r)
tryFetchRest :: Text' -> Maybe (Text', Text)
tryFetchRest (Text' l c txt) =
if T.null txt
then Nothing
else Just (Text' l c "", txt)
fetchSpan :: Text' -> Loc -> (Text', Text)
fetchSpan txt (Loc _ _ el ec) = reachLoc txt el ec
reachLoc
:: Text'
-> Int
-> Int
-> (Text', Text)
reachLoc txt@(Text' _ _ original) l c =
let chunk = T.unfoldr f txt
f (Text' l' c' s) = do
guard (l' < l || c' < c)
(ch, s') <- T.uncons s
let (l'', c'') = case ch of
'\n' -> (l' + 1, 1)
'\t' -> (l', c' + 8 - ((c' - 1) `rem` 8))
_ -> (l', c' + 1)
return (ch, Text' l'' c'' s')
in (Text' l c (T.drop (T.length chunk) original), chunk)
extsBitmap :: Word64
extsBitmap = mkExtsBitmap enabledExts
{-# NOINLINE extsBitmap #-}
mkExtsBitmap :: [ExtBits] -> Word64
mkExtsBitmap = foldl' f 0
where
f w x = bit (fromEnum x) .|. w
data ExtBits
= FfiBit
| InterruptibleFfiBit
| CApiFfiBit
| ParrBit
| ArrowsBit
| ThBit
| ThQuotesBit
| IpBit
| OverloadedLabelsBit
| ExplicitForallBit
| BangPatBit
| PatternSynonymsBit
| HaddockBit
| MagicHashBit
| RecursiveDoBit
| UnicodeSyntaxBit
| UnboxedTuplesBit
| UnboxedSumsBit
| DatatypeContextsBit
| TransformComprehensionsBit
| QqBit
| InRulePragBit
| RawTokenStreamBit
| SccProfilingOnBit
| HpcBit
| AlternativeLayoutRuleBit
| RelaxedLayoutBit
| NondecreasingIndentationBit
| SafeHaskellBit
| TraditionalRecordSyntaxBit
| ExplicitNamespacesBit
| LambdaCaseBit
| BinaryLiteralsBit
| NegativeLiteralsBit
| HexFloatLiteralsBit
| TypeApplicationsBit
| StaticPointersBit
| NumericUnderscoresBit
deriving Enum
enabledExts :: [ExtBits]
enabledExts =
[ FfiBit
, InterruptibleFfiBit
, CApiFfiBit
, ParrBit
, ArrowsBit
, ThBit
, ThQuotesBit
, IpBit
, OverloadedLabelsBit
, ExplicitForallBit
, BangPatBit
, PatternSynonymsBit
, HaddockBit
, MagicHashBit
, RecursiveDoBit
, UnicodeSyntaxBit
, UnboxedTuplesBit
, UnboxedSumsBit
, DatatypeContextsBit
, TransformComprehensionsBit
, QqBit
, InRulePragBit
, RawTokenStreamBit
, SafeHaskellBit
, LambdaCaseBit
, BinaryLiteralsBit
, NegativeLiteralsBit
, HexFloatLiteralsBit
, TypeApplicationsBit
, StaticPointersBit
, NumericUnderscoresBit
]