{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
{-# OPTIONS_GHC -fno-warn-missing-fields #-}
module GHC.SyntaxHighlighter
( Token (..)
, Loc (..)
, tokenizeHaskell
, tokenizeHaskellLoc )
where
import Control.Monad
import Data.List (foldl', unfoldr)
import Data.Maybe (isJust)
import Data.Text (Text)
import DynFlags
import FastString (mkFastString)
import Fingerprint (fingerprint0)
import GHC.LanguageExtensions
import GHC.Version (cProjectVersion)
import SrcLoc
import StringBuffer
import ToolSettings
import qualified Data.Text as T
import qualified EnumSet as ES
import qualified Lexer as L
data Token
= KeywordTok
| PragmaTok
| SymbolTok
| VariableTok
| ConstructorTok
| OperatorTok
| CharTok
| StringTok
| IntegerTok
| RationalTok
| CommentTok
| SpaceTok
| OtherTok
deriving (Eq, Ord, Enum, Bounded, Show)
data Loc = Loc !Int !Int !Int !Int
deriving (Eq, Ord, Show)
tokenizeHaskell :: Text -> Maybe [(Token, Text)]
tokenizeHaskell input = sliceInputStream input <$> tokenizeHaskellLoc input
sliceInputStream :: Text -> [(Token, Loc)] -> [(Token, Text)]
sliceInputStream input toks = unfoldr sliceOnce (initText' input, toks)
where
sliceOnce (txt, []) = do
(txt', chunk) <- tryFetchRest txt
return ((SpaceTok, chunk), (txt', []))
sliceOnce (txt, tss@((t, l):ts)) =
case tryFetchSpace txt l of
Nothing ->
let (txt', chunk) = fetchSpan txt l
t' = case t of
CommentTok -> if isHeaderPragma chunk
then PragmaTok
else CommentTok
tok -> tok
in Just ((t', chunk), (txt', ts))
Just (txt', chunk) ->
Just ((SpaceTok, chunk), (txt', tss))
tokenizeHaskellLoc :: Text -> Maybe [(Token, Loc)]
tokenizeHaskellLoc input =
case L.unP pLexer parseState of
L.PFailed {} -> Nothing
L.POk _ x -> Just x
where
location = mkRealSrcLoc (mkFastString "") 1 1
buffer = stringToStringBuffer (T.unpack input)
parseState = L.mkPStatePure parserFlags buffer location
parserFlags = L.mkParserFlags (foldl' xopt_set initialDynFlags enabledExts)
initialDynFlags = DynFlags
{ warningFlags = ES.empty
, generalFlags = ES.fromList
[ Opt_Haddock
, Opt_KeepRawTokenStream
]
, extensions = []
, extensionFlags = ES.empty
, safeHaskell = Sf_Safe
, language = Just Haskell2010
, ghcNameVersion = GhcNameVersion
{ ghcNameVersion_programName = "ghc"
, ghcNameVersion_projectVersion = cProjectVersion
}
, fileSettings = FileSettings {}
, toolSettings = ToolSettings
{ toolSettings_opt_P_fingerprint = fingerprint0
, toolSettings_pgm_F = ""
}
, platformMisc = PlatformMisc {}
}
pLexer :: L.P [(Token, Loc)]
pLexer = go
where
go = do
r <- L.lexer False return
case r of
L _ L.ITeof -> return []
_ ->
case fixupToken r of
Nothing -> go
Just x -> (x:) <$> go
fixupToken :: Located L.Token -> Maybe (Token, Loc)
fixupToken (L srcSpan tok) = (classifyToken tok,) <$> srcSpanToLoc srcSpan
srcSpanToLoc :: SrcSpan -> Maybe Loc
srcSpanToLoc (RealSrcSpan rss) =
let start = realSrcSpanStart rss
end = realSrcSpanEnd rss
in if start == end
then Nothing
else Just $ Loc (srcLocLine start)
(srcLocCol start)
(srcLocLine end)
(srcLocCol end)
srcSpanToLoc _ = Nothing
classifyToken :: L.Token -> Token
classifyToken = \case
L.ITas -> KeywordTok
L.ITcase -> KeywordTok
L.ITclass -> KeywordTok
L.ITdata -> KeywordTok
L.ITdefault -> KeywordTok
L.ITderiving -> KeywordTok
L.ITdo -> KeywordTok
L.ITelse -> KeywordTok
L.IThiding -> KeywordTok
L.ITforeign -> KeywordTok
L.ITif -> KeywordTok
L.ITimport -> KeywordTok
L.ITin -> KeywordTok
L.ITinfix -> KeywordTok
L.ITinfixl -> KeywordTok
L.ITinfixr -> KeywordTok
L.ITinstance -> KeywordTok
L.ITlet -> KeywordTok
L.ITmodule -> KeywordTok
L.ITnewtype -> KeywordTok
L.ITof -> KeywordTok
L.ITqualified -> KeywordTok
L.ITthen -> KeywordTok
L.ITtype -> KeywordTok
L.ITwhere -> KeywordTok
L.ITforall _ -> KeywordTok
L.ITexport -> KeywordTok
L.ITlabel -> KeywordTok
L.ITdynamic -> KeywordTok
L.ITsafe -> KeywordTok
L.ITinterruptible -> KeywordTok
L.ITunsafe -> KeywordTok
L.ITstdcallconv -> KeywordTok
L.ITccallconv -> KeywordTok
L.ITcapiconv -> KeywordTok
L.ITprimcallconv -> KeywordTok
L.ITjavascriptcallconv -> KeywordTok
L.ITmdo -> KeywordTok
L.ITfamily -> KeywordTok
L.ITrole -> KeywordTok
L.ITgroup -> KeywordTok
L.ITby -> KeywordTok
L.ITusing -> KeywordTok
L.ITpattern -> KeywordTok
L.ITstatic -> KeywordTok
L.ITstock -> KeywordTok
L.ITanyclass -> KeywordTok
L.ITvia -> KeywordTok
L.ITunit -> KeywordTok
L.ITsignature -> KeywordTok
L.ITdependency -> KeywordTok
L.ITrequires -> KeywordTok
L.ITinline_prag {} -> PragmaTok
L.ITspec_prag _ -> PragmaTok
L.ITspec_inline_prag {} -> PragmaTok
L.ITsource_prag _ -> PragmaTok
L.ITrules_prag _ -> PragmaTok
L.ITwarning_prag _ -> PragmaTok
L.ITdeprecated_prag _ -> PragmaTok
L.ITline_prag _ -> PragmaTok
L.ITcolumn_prag _ -> PragmaTok
L.ITscc_prag _ -> PragmaTok
L.ITgenerated_prag _ -> PragmaTok
L.ITcore_prag _ -> PragmaTok
L.ITunpack_prag _ -> PragmaTok
L.ITnounpack_prag _ -> PragmaTok
L.ITann_prag _ -> PragmaTok
L.ITcomplete_prag _ -> PragmaTok
L.ITclose_prag -> PragmaTok
L.IToptions_prag _ -> PragmaTok
L.ITinclude_prag _ -> PragmaTok
L.ITlanguage_prag -> PragmaTok
L.ITminimal_prag _ -> PragmaTok
L.IToverlappable_prag _ -> PragmaTok
L.IToverlapping_prag _ -> PragmaTok
L.IToverlaps_prag _ -> PragmaTok
L.ITincoherent_prag _ -> PragmaTok
L.ITctype _ -> PragmaTok
L.ITcomment_line_prag -> PragmaTok
L.ITdotdot -> SymbolTok
L.ITcolon -> SymbolTok
L.ITdcolon _ -> SymbolTok
L.ITequal -> SymbolTok
L.ITlam -> SymbolTok
L.ITlcase -> SymbolTok
L.ITvbar -> SymbolTok
L.ITlarrow _ -> SymbolTok
L.ITrarrow _ -> SymbolTok
L.ITat -> SymbolTok
L.ITtilde -> SymbolTok
L.ITdarrow _ -> SymbolTok
L.ITbang -> SymbolTok
L.ITstar _ -> SymbolTok
L.ITbiglam -> SymbolTok
L.ITocurly -> SymbolTok
L.ITccurly -> SymbolTok
L.ITvocurly -> SymbolTok
L.ITvccurly -> SymbolTok
L.ITobrack -> SymbolTok
L.ITopabrack -> SymbolTok
L.ITcpabrack -> SymbolTok
L.ITcbrack -> SymbolTok
L.IToparen -> SymbolTok
L.ITcparen -> SymbolTok
L.IToubxparen -> SymbolTok
L.ITcubxparen -> SymbolTok
L.ITsemi -> SymbolTok
L.ITcomma -> SymbolTok
L.ITunderscore -> SymbolTok
L.ITbackquote -> SymbolTok
L.ITsimpleQuote -> SymbolTok
L.ITminus -> OperatorTok
L.ITdot -> OperatorTok
L.ITvarid _ -> VariableTok
L.ITconid _ -> ConstructorTok
L.ITvarsym _ -> OperatorTok
L.ITconsym _ -> OperatorTok
L.ITqvarid _ -> VariableTok
L.ITqconid _ -> ConstructorTok
L.ITqvarsym _ -> OperatorTok
L.ITqconsym _ -> OperatorTok
L.ITdupipvarid _ -> VariableTok
L.ITlabelvarid _ -> VariableTok
L.ITchar _ _ -> CharTok
L.ITstring _ _ -> StringTok
L.ITinteger _ -> IntegerTok
L.ITrational _ -> RationalTok
L.ITprimchar _ _ -> CharTok
L.ITprimstring _ _ -> StringTok
L.ITprimint _ _ -> IntegerTok
L.ITprimword _ _ -> IntegerTok
L.ITprimfloat _ -> RationalTok
L.ITprimdouble _ -> RationalTok
L.ITopenExpQuote _ _ -> SymbolTok
L.ITopenPatQuote -> SymbolTok
L.ITopenDecQuote -> SymbolTok
L.ITopenTypQuote -> SymbolTok
L.ITcloseQuote _ -> SymbolTok
L.ITopenTExpQuote _ -> SymbolTok
L.ITcloseTExpQuote -> SymbolTok
L.ITidEscape _ -> SymbolTok
L.ITparenEscape -> SymbolTok
L.ITidTyEscape _ -> SymbolTok
L.ITparenTyEscape -> SymbolTok
L.ITtyQuote -> SymbolTok
L.ITquasiQuote _ -> SymbolTok
L.ITqQuasiQuote _ -> SymbolTok
L.ITproc -> KeywordTok
L.ITrec -> KeywordTok
L.IToparenbar _ -> SymbolTok
L.ITcparenbar _ -> SymbolTok
L.ITlarrowtail _ -> SymbolTok
L.ITrarrowtail _ -> SymbolTok
L.ITLarrowtail _ -> SymbolTok
L.ITRarrowtail _ -> SymbolTok
L.ITtypeApp -> SymbolTok
L.ITunknown _ -> OtherTok
L.ITeof -> OtherTok
L.ITdocCommentNext _ -> CommentTok
L.ITdocCommentPrev _ -> CommentTok
L.ITdocCommentNamed _ -> CommentTok
L.ITdocSection _ _ -> CommentTok
L.ITdocOptions _ -> CommentTok
L.ITlineComment _ -> CommentTok
L.ITblockComment _ -> CommentTok
data Text' = Text'
{-# UNPACK #-} !Int
{-# UNPACK #-} !Int
{-# UNPACK #-} !Text
deriving (Show)
initText' :: Text -> Text'
initText' = Text' 1 1
tryFetchSpace :: Text' -> Loc -> Maybe (Text', Text)
tryFetchSpace txt (Loc sl sc _ _) =
let (txt', r) = reachLoc txt sl sc
in if T.null r
then Nothing
else Just (txt', r)
tryFetchRest :: Text' -> Maybe (Text', Text)
tryFetchRest (Text' l c txt) =
if T.null txt
then Nothing
else Just (Text' l c "", txt)
fetchSpan :: Text' -> Loc -> (Text', Text)
fetchSpan txt (Loc _ _ el ec) = reachLoc txt el ec
reachLoc
:: Text'
-> Int
-> Int
-> (Text', Text)
reachLoc txt@(Text' _ _ original) l c =
let chunk = T.unfoldr f txt
f (Text' l' c' s) = do
guard (l' < l || c' < c)
(ch, s') <- T.uncons s
let (l'', c'') = case ch of
'\n' -> (l' + 1, 1)
'\t' -> (l', c' + 8 - ((c' - 1) `rem` 8))
_ -> (l', c' + 1)
return (ch, Text' l'' c'' s')
in (Text' l c (T.drop (T.length chunk) original), chunk)
isHeaderPragma :: Text -> Bool
isHeaderPragma txt0 = isJust $ do
txt1 <- T.stripStart <$> T.stripPrefix "{-#" txt0
guard (T.isPrefixOf "LANGUAGE" txt1 || T.isPrefixOf "OPTIONS_GHC" txt1)
enabledExts :: [Extension]
enabledExts =
[ ForeignFunctionInterface
, InterruptibleFFI
, CApiFFI
, Arrows
, TemplateHaskell
, TemplateHaskellQuotes
, ImplicitParams
, OverloadedLabels
, ExplicitForAll
, BangPatterns
, PatternSynonyms
, MagicHash
, RecursiveDo
, UnicodeSyntax
, UnboxedTuples
, UnboxedSums
, DatatypeContexts
, TransformListComp
, QuasiQuotes
, LambdaCase
, BinaryLiterals
, NegativeLiterals
, HexFloatLiterals
, TypeApplications
, StaticPointers
, NumericUnderscores
, StarIsType
]