{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# OPTIONS_GHC -fno-warn-missing-fields #-}

-- |
-- Module      :  GHC.SyntaxHighlighter
-- Copyright   :  © 2018–present Mark Karpov
-- License     :  BSD 3 clause
--
-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
-- Stability   :  experimental
-- Portability :  portable
--
-- The module allows you to decompose a 'Text' stream containing Haskell
-- source code into a stream of 'Text' chunks tagged with 'Token'.
--
-- This library uses GHC's lexer, so the result is guaranteed to be 100%
-- correct, as if it was parsed by GHC itself.
module GHC.SyntaxHighlighter
  ( Token (..),
    Loc (..),
    tokenizeHaskell,
    tokenizeHaskellLoc,
  )
where

import Control.Monad
import Data.List (foldl', unfoldr)
import Data.Maybe (isJust)
import Data.Text (Text)
import qualified Data.Text as T
import qualified GHC.Data.EnumSet as ES
import GHC.Data.FastString (mkFastString)
import GHC.Data.StringBuffer
import GHC.Driver.Session as DynFlags
import GHC.LanguageExtensions
import qualified GHC.Parser.Lexer as L
import GHC.Settings
import GHC.Types.SrcLoc
import GHC.Utils.Fingerprint (fingerprint0)
import GHC.Version (cProjectVersion)

----------------------------------------------------------------------------
-- Data types

-- | Token types that are used as tags to mark spans of source code.
data Token
  = -- | Keyword
    KeywordTok
  | -- | Pragmas
    PragmaTok
  | -- | Symbols (punctuation that is not an operator)
    SymbolTok
  | -- | Variable name (term level)
    VariableTok
  | -- | Data\/type constructor
    ConstructorTok
  | -- | Operator
    OperatorTok
  | -- | Character
    CharTok
  | -- | String
    StringTok
  | -- | Integer
    IntegerTok
  | -- | Rational number
    RationalTok
  | -- | Comment (including Haddocks)
    CommentTok
  | -- | Space filling
    SpaceTok
  | -- | Something else?
    OtherTok
  deriving (Token -> Token -> Bool
(Token -> Token -> Bool) -> (Token -> Token -> Bool) -> Eq Token
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Token -> Token -> Bool
$c/= :: Token -> Token -> Bool
== :: Token -> Token -> Bool
$c== :: Token -> Token -> Bool
Eq, Eq Token
Eq Token
-> (Token -> Token -> Ordering)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Bool)
-> (Token -> Token -> Token)
-> (Token -> Token -> Token)
-> Ord Token
Token -> Token -> Bool
Token -> Token -> Ordering
Token -> Token -> Token
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Token -> Token -> Token
$cmin :: Token -> Token -> Token
max :: Token -> Token -> Token
$cmax :: Token -> Token -> Token
>= :: Token -> Token -> Bool
$c>= :: Token -> Token -> Bool
> :: Token -> Token -> Bool
$c> :: Token -> Token -> Bool
<= :: Token -> Token -> Bool
$c<= :: Token -> Token -> Bool
< :: Token -> Token -> Bool
$c< :: Token -> Token -> Bool
compare :: Token -> Token -> Ordering
$ccompare :: Token -> Token -> Ordering
$cp1Ord :: Eq Token
Ord, Int -> Token
Token -> Int
Token -> [Token]
Token -> Token
Token -> Token -> [Token]
Token -> Token -> Token -> [Token]
(Token -> Token)
-> (Token -> Token)
-> (Int -> Token)
-> (Token -> Int)
-> (Token -> [Token])
-> (Token -> Token -> [Token])
-> (Token -> Token -> [Token])
-> (Token -> Token -> Token -> [Token])
-> Enum Token
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: Token -> Token -> Token -> [Token]
$cenumFromThenTo :: Token -> Token -> Token -> [Token]
enumFromTo :: Token -> Token -> [Token]
$cenumFromTo :: Token -> Token -> [Token]
enumFromThen :: Token -> Token -> [Token]
$cenumFromThen :: Token -> Token -> [Token]
enumFrom :: Token -> [Token]
$cenumFrom :: Token -> [Token]
fromEnum :: Token -> Int
$cfromEnum :: Token -> Int
toEnum :: Int -> Token
$ctoEnum :: Int -> Token
pred :: Token -> Token
$cpred :: Token -> Token
succ :: Token -> Token
$csucc :: Token -> Token
Enum, Token
Token -> Token -> Bounded Token
forall a. a -> a -> Bounded a
maxBound :: Token
$cmaxBound :: Token
minBound :: Token
$cminBound :: Token
Bounded, Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
(Int -> Token -> ShowS)
-> (Token -> String) -> ([Token] -> ShowS) -> Show Token
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show)

-- | Start and end positions of a span. The arguments of the data
-- constructor contain in order:
--
--     * Line number of start position of a span
--     * Column number of start position of a span
--     * Line number of end position of a span
--     * Column number of end position of a span
--
-- @since 0.0.2.0
data Loc = Loc !Int !Int !Int !Int
  deriving (Loc -> Loc -> Bool
(Loc -> Loc -> Bool) -> (Loc -> Loc -> Bool) -> Eq Loc
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Loc -> Loc -> Bool
$c/= :: Loc -> Loc -> Bool
== :: Loc -> Loc -> Bool
$c== :: Loc -> Loc -> Bool
Eq, Eq Loc
Eq Loc
-> (Loc -> Loc -> Ordering)
-> (Loc -> Loc -> Bool)
-> (Loc -> Loc -> Bool)
-> (Loc -> Loc -> Bool)
-> (Loc -> Loc -> Bool)
-> (Loc -> Loc -> Loc)
-> (Loc -> Loc -> Loc)
-> Ord Loc
Loc -> Loc -> Bool
Loc -> Loc -> Ordering
Loc -> Loc -> Loc
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Loc -> Loc -> Loc
$cmin :: Loc -> Loc -> Loc
max :: Loc -> Loc -> Loc
$cmax :: Loc -> Loc -> Loc
>= :: Loc -> Loc -> Bool
$c>= :: Loc -> Loc -> Bool
> :: Loc -> Loc -> Bool
$c> :: Loc -> Loc -> Bool
<= :: Loc -> Loc -> Bool
$c<= :: Loc -> Loc -> Bool
< :: Loc -> Loc -> Bool
$c< :: Loc -> Loc -> Bool
compare :: Loc -> Loc -> Ordering
$ccompare :: Loc -> Loc -> Ordering
$cp1Ord :: Eq Loc
Ord, Int -> Loc -> ShowS
[Loc] -> ShowS
Loc -> String
(Int -> Loc -> ShowS)
-> (Loc -> String) -> ([Loc] -> ShowS) -> Show Loc
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Loc] -> ShowS
$cshowList :: [Loc] -> ShowS
show :: Loc -> String
$cshow :: Loc -> String
showsPrec :: Int -> Loc -> ShowS
$cshowsPrec :: Int -> Loc -> ShowS
Show)

----------------------------------------------------------------------------
-- High-level API

-- | Tokenize Haskell source code. If the code cannot be parsed, return
-- 'Nothing'. Otherwise return the original input tagged by 'Token's.
-- 'Nothing' is rarely returned if ever, because it looks like the lexer is
-- capable of interpreting almost any text as a stream of GHC tokens.
--
-- The parser does not require the input source code to form a valid Haskell
-- program, so as long as the lexer can decompose your input (most of the
-- time), it'll return something in 'Just'.
tokenizeHaskell :: Text -> Maybe [(Token, Text)]
tokenizeHaskell :: Text -> Maybe [(Token, Text)]
tokenizeHaskell Text
input = Text -> [(Token, Loc)] -> [(Token, Text)]
sliceInputStream Text
input ([(Token, Loc)] -> [(Token, Text)])
-> Maybe [(Token, Loc)] -> Maybe [(Token, Text)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Maybe [(Token, Loc)]
tokenizeHaskellLoc Text
input

-- | Replace 'Loc' locations with actual chunks of input 'Text'.
sliceInputStream :: Text -> [(Token, Loc)] -> [(Token, Text)]
sliceInputStream :: Text -> [(Token, Loc)] -> [(Token, Text)]
sliceInputStream Text
input [(Token, Loc)]
toks = ((Text', [(Token, Loc)])
 -> Maybe ((Token, Text), (Text', [(Token, Loc)])))
-> (Text', [(Token, Loc)]) -> [(Token, Text)]
forall b a. (b -> Maybe (a, b)) -> b -> [a]
unfoldr (Text', [(Token, Loc)])
-> Maybe ((Token, Text), (Text', [(Token, Loc)]))
sliceOnce (Text -> Text'
initText' Text
input, [(Token, Loc)]
toks)
  where
    sliceOnce :: (Text', [(Token, Loc)])
-> Maybe ((Token, Text), (Text', [(Token, Loc)]))
sliceOnce (Text'
txt, []) = do
      (Text'
txt', Text
chunk) <- Text' -> Maybe (Text', Text)
tryFetchRest Text'
txt
      ((Token, Text), (Text', [(Token, Loc)]))
-> Maybe ((Token, Text), (Text', [(Token, Loc)]))
forall (m :: * -> *) a. Monad m => a -> m a
return ((Token
SpaceTok, Text
chunk), (Text'
txt', []))
    sliceOnce (Text'
txt, tss :: [(Token, Loc)]
tss@((Token
t, Loc
l) : [(Token, Loc)]
ts)) =
      case Text' -> Loc -> Maybe (Text', Text)
tryFetchSpace Text'
txt Loc
l of
        Maybe (Text', Text)
Nothing ->
          let (Text'
txt', Text
chunk) = Text' -> Loc -> (Text', Text)
fetchSpan Text'
txt Loc
l
              t' :: Token
t' = case Token
t of
                Token
CommentTok ->
                  if Text -> Bool
isHeaderPragma Text
chunk
                    then Token
PragmaTok
                    else Token
CommentTok
                Token
tok -> Token
tok
           in ((Token, Text), (Text', [(Token, Loc)]))
-> Maybe ((Token, Text), (Text', [(Token, Loc)]))
forall a. a -> Maybe a
Just ((Token
t', Text
chunk), (Text'
txt', [(Token, Loc)]
ts))
        Just (Text'
txt', Text
chunk) ->
          ((Token, Text), (Text', [(Token, Loc)]))
-> Maybe ((Token, Text), (Text', [(Token, Loc)]))
forall a. a -> Maybe a
Just ((Token
SpaceTok, Text
chunk), (Text'
txt', [(Token, Loc)]
tss))

-- | Similar to 'tokenizeHaskell', but instead of 'Text' chunks provides
-- locations of corresponding spans in the given input stream.
--
-- @since 0.0.2.0
tokenizeHaskellLoc :: Text -> Maybe [(Token, Loc)]
tokenizeHaskellLoc :: Text -> Maybe [(Token, Loc)]
tokenizeHaskellLoc Text
input =
  case P [(Token, Loc)] -> PState -> ParseResult [(Token, Loc)]
forall a. P a -> PState -> ParseResult a
L.unP P [(Token, Loc)]
pLexer PState
parseState of
    L.PFailed {} -> Maybe [(Token, Loc)]
forall a. Maybe a
Nothing
    L.POk PState
_ [(Token, Loc)]
x -> [(Token, Loc)] -> Maybe [(Token, Loc)]
forall a. a -> Maybe a
Just [(Token, Loc)]
x
  where
    location :: RealSrcLoc
location = FastString -> Int -> Int -> RealSrcLoc
mkRealSrcLoc (String -> FastString
mkFastString String
"") Int
1 Int
1
    buffer :: StringBuffer
buffer = String -> StringBuffer
stringToStringBuffer (Text -> String
T.unpack Text
input)
    parseState :: PState
parseState = ParserFlags -> StringBuffer -> RealSrcLoc -> PState
L.mkPStatePure ParserFlags
parserFlags StringBuffer
buffer RealSrcLoc
location
    parserFlags :: ParserFlags
parserFlags = DynFlags -> ParserFlags
L.mkParserFlags ((DynFlags -> Extension -> DynFlags)
-> DynFlags -> [Extension] -> DynFlags
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' DynFlags -> Extension -> DynFlags
xopt_set DynFlags
initialDynFlags [Extension]
enabledExts)
    initialDynFlags :: DynFlags
initialDynFlags =
      DynFlags :: GhcMode
-> GhcLink
-> HscTarget
-> GhcNameVersion
-> FileSettings
-> Platform
-> ToolSettings
-> PlatformMisc
-> PlatformConstants
-> [(String, String)]
-> LlvmConfig
-> Int
-> Int
-> Int
-> Int
-> Int
-> Maybe String
-> Maybe String
-> [Int]
-> Maybe Int
-> Bool
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Int
-> Int
-> Int
-> Maybe Int
-> Maybe Int
-> Int
-> Word
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Maybe Int
-> Bool
-> Maybe Int
-> Int
-> [String]
-> Module
-> Maybe String
-> IntWithInf
-> IntWithInf
-> UnitId
-> Maybe IndefUnitId
-> [(ModuleName, Module)]
-> Set Way
-> Maybe (String, Int)
-> Maybe String
-> Maybe String
-> Maybe String
-> Maybe String
-> Maybe String
-> Maybe String
-> String
-> String
-> String
-> String
-> IORef Bool
-> String
-> String
-> Maybe String
-> Maybe String
-> Maybe String
-> DynLibLoader
-> Maybe String
-> Maybe String
-> [Option]
-> IncludeSpecs
-> [String]
-> [String]
-> [String]
-> Maybe String
-> RtsOptsEnabled
-> Bool
-> String
-> [ModuleName]
-> [(ModuleName, String)]
-> [String]
-> [LoadedPlugin]
-> [StaticPlugin]
-> Hooks
-> String
-> Bool
-> Bool
-> [ModuleName]
-> [String]
-> [PackageDBFlag]
-> [IgnorePackageFlag]
-> [PackageFlag]
-> [PackageFlag]
-> [TrustFlag]
-> Maybe String
-> Maybe [UnitDatabase UnitId]
-> UnitState
-> IORef FilesToClean
-> IORef (Map String String)
-> IORef Int
-> IORef (Set String)
-> EnumSet DumpFlag
-> EnumSet GeneralFlag
-> EnumSet WarningFlag
-> EnumSet WarningFlag
-> Maybe Language
-> SafeHaskellMode
-> Bool
-> Bool
-> SrcSpan
-> SrcSpan
-> SrcSpan
-> SrcSpan
-> SrcSpan
-> SrcSpan
-> SrcSpan
-> SrcSpan
-> [OnOff Extension]
-> EnumSet Extension
-> Int
-> Int
-> Int
-> Int
-> Int
-> Bool
-> Int
-> Int
-> LogAction
-> DumpAction
-> TraceAction
-> FlushOut
-> FlushErr
-> Maybe String
-> Maybe String
-> [String]
-> Int
-> Int
-> Bool
-> OverridingBool
-> Bool
-> Scheme
-> ProfAuto
-> Maybe String
-> IORef (ModuleEnv Int)
-> Maybe SseVersion
-> Maybe BmiVersion
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> IORef (Maybe LinkerInfo)
-> IORef (Maybe CompilerInfo)
-> Int
-> Int
-> Int
-> Bool
-> Maybe Int
-> Int
-> Int
-> CfgWeights
-> DynFlags
DynFlags
        { warningFlags :: EnumSet WarningFlag
warningFlags = EnumSet WarningFlag
forall a. EnumSet a
ES.empty,
          generalFlags :: EnumSet GeneralFlag
generalFlags =
            [GeneralFlag] -> EnumSet GeneralFlag
forall a. Enum a => [a] -> EnumSet a
ES.fromList
              [ GeneralFlag
Opt_Haddock,
                GeneralFlag
Opt_KeepRawTokenStream
              ],
          extensions :: [OnOff Extension]
extensions = [],
          extensionFlags :: EnumSet Extension
extensionFlags = EnumSet Extension
forall a. EnumSet a
ES.empty,
          safeHaskell :: SafeHaskellMode
safeHaskell = SafeHaskellMode
Sf_Safe,
          language :: Maybe Language
language = Language -> Maybe Language
forall a. a -> Maybe a
Just Language
Haskell2010,
          ghcNameVersion :: GhcNameVersion
ghcNameVersion =
            GhcNameVersion :: String -> String -> GhcNameVersion
GhcNameVersion
              { ghcNameVersion_programName :: String
ghcNameVersion_programName = String
"ghc",
                ghcNameVersion_projectVersion :: String
ghcNameVersion_projectVersion = String
cProjectVersion
              },
          fileSettings :: FileSettings
fileSettings = FileSettings :: String
-> String
-> Maybe String
-> String
-> String
-> String
-> FileSettings
FileSettings {},
          toolSettings :: ToolSettings
toolSettings =
            ToolSettings :: Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> String
-> (String, [Option])
-> String
-> String
-> (String, [Option])
-> (String, [Option])
-> (String, [Option])
-> (String, [Option])
-> String
-> String
-> String
-> String
-> String
-> String
-> String
-> (String, [Option])
-> (String, [Option])
-> (String, [Option])
-> String
-> [String]
-> [String]
-> Fingerprint
-> [String]
-> [String]
-> [String]
-> [String]
-> [String]
-> [String]
-> [String]
-> [String]
-> [String]
-> [String]
-> [String]
-> [String]
-> ToolSettings
ToolSettings
              { toolSettings_opt_P_fingerprint :: Fingerprint
toolSettings_opt_P_fingerprint = Fingerprint
fingerprint0,
                toolSettings_pgm_F :: String
toolSettings_pgm_F = String
""
              },
          platformMisc :: PlatformMisc
platformMisc = PlatformMisc :: String
-> Bool
-> Bool
-> String
-> Bool
-> Bool
-> Bool
-> Bool
-> String
-> PlatformMisc
PlatformMisc {}
        }

-- | Haskell lexer.
pLexer :: L.P [(Token, Loc)]
pLexer :: P [(Token, Loc)]
pLexer = P [(Token, Loc)]
go
  where
    go :: P [(Token, Loc)]
go = do
      Located Token
r <- Bool -> (Located Token -> P (Located Token)) -> P (Located Token)
forall a. Bool -> (Located Token -> P a) -> P a
L.lexer Bool
False Located Token -> P (Located Token)
forall (m :: * -> *) a. Monad m => a -> m a
return
      case Located Token
r of
        L SrcSpan
_ Token
L.ITeof -> [(Token, Loc)] -> P [(Token, Loc)]
forall (m :: * -> *) a. Monad m => a -> m a
return []
        Located Token
_ ->
          case Located Token -> Maybe (Token, Loc)
fixupToken Located Token
r of
            Maybe (Token, Loc)
Nothing -> P [(Token, Loc)]
go
            Just (Token, Loc)
x -> ((Token, Loc)
x (Token, Loc) -> [(Token, Loc)] -> [(Token, Loc)]
forall a. a -> [a] -> [a]
:) ([(Token, Loc)] -> [(Token, Loc)])
-> P [(Token, Loc)] -> P [(Token, Loc)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> P [(Token, Loc)]
go

-- | Convert @'Located' 'L.Token'@ representation to a more convenient for
-- us form.
fixupToken :: Located L.Token -> Maybe (Token, Loc)
fixupToken :: Located Token -> Maybe (Token, Loc)
fixupToken (L SrcSpan
srcSpan Token
tok) = (Token -> Token
classifyToken Token
tok,) (Loc -> (Token, Loc)) -> Maybe Loc -> Maybe (Token, Loc)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> SrcSpan -> Maybe Loc
srcSpanToLoc SrcSpan
srcSpan

-- | Convert 'SrcSpan' to 'Loc'.
srcSpanToLoc :: SrcSpan -> Maybe Loc
srcSpanToLoc :: SrcSpan -> Maybe Loc
srcSpanToLoc (RealSrcSpan RealSrcSpan
s Maybe BufSpan
_) =
  let srcSpanSLine :: Int
srcSpanSLine = RealSrcSpan -> Int
srcSpanStartLine RealSrcSpan
s
      srcSpanSCol :: Int
srcSpanSCol = RealSrcSpan -> Int
srcSpanStartCol RealSrcSpan
s
      srcSpanELine :: Int
srcSpanELine = RealSrcSpan -> Int
srcSpanEndLine RealSrcSpan
s
      srcSpanECol :: Int
srcSpanECol = RealSrcSpan -> Int
srcSpanEndCol RealSrcSpan
s
      start :: (Int, Int)
start = (Int
srcSpanSLine, Int
srcSpanSCol)
      end :: (Int, Int)
end = (Int
srcSpanELine, Int
srcSpanECol)
   in if (Int, Int)
start (Int, Int) -> (Int, Int) -> Bool
forall a. Eq a => a -> a -> Bool
== (Int, Int)
end
        then Maybe Loc
forall a. Maybe a
Nothing -- NOTE Some magic auto-generated tokens that do not
        -- actually appear in the input stream. Drop them.
        else
          Loc -> Maybe Loc
forall a. a -> Maybe a
Just (Loc -> Maybe Loc) -> Loc -> Maybe Loc
forall a b. (a -> b) -> a -> b
$
            Int -> Int -> Int -> Int -> Loc
Loc
              Int
srcSpanSLine
              Int
srcSpanSCol
              Int
srcSpanELine
              Int
srcSpanECol
srcSpanToLoc SrcSpan
_ = Maybe Loc
forall a. Maybe a
Nothing

-- | Classify a 'L.Token' in terms of 'Token'.
classifyToken :: L.Token -> Token
classifyToken :: Token -> Token
classifyToken = \case
  -- Keywords
  Token
L.ITas -> Token
KeywordTok
  Token
L.ITcase -> Token
KeywordTok
  Token
L.ITclass -> Token
KeywordTok
  Token
L.ITdata -> Token
KeywordTok
  Token
L.ITdefault -> Token
KeywordTok
  Token
L.ITderiving -> Token
KeywordTok
  L.ITdo Maybe FastString
_ -> Token
KeywordTok
  Token
L.ITelse -> Token
KeywordTok
  Token
L.IThiding -> Token
KeywordTok
  Token
L.ITforeign -> Token
KeywordTok
  Token
L.ITif -> Token
KeywordTok
  Token
L.ITimport -> Token
KeywordTok
  Token
L.ITin -> Token
KeywordTok
  Token
L.ITinfix -> Token
KeywordTok
  Token
L.ITinfixl -> Token
KeywordTok
  Token
L.ITinfixr -> Token
KeywordTok
  Token
L.ITinstance -> Token
KeywordTok
  Token
L.ITlet -> Token
KeywordTok
  Token
L.ITmodule -> Token
KeywordTok
  Token
L.ITnewtype -> Token
KeywordTok
  Token
L.ITof -> Token
KeywordTok
  Token
L.ITqualified -> Token
KeywordTok
  Token
L.ITthen -> Token
KeywordTok
  Token
L.ITtype -> Token
KeywordTok
  Token
L.ITwhere -> Token
KeywordTok
  L.ITforall IsUnicodeSyntax
_ -> Token
KeywordTok
  Token
L.ITexport -> Token
KeywordTok
  Token
L.ITlabel -> Token
KeywordTok
  Token
L.ITdynamic -> Token
KeywordTok
  Token
L.ITsafe -> Token
KeywordTok
  Token
L.ITinterruptible -> Token
KeywordTok
  Token
L.ITunsafe -> Token
KeywordTok
  Token
L.ITstdcallconv -> Token
KeywordTok
  Token
L.ITccallconv -> Token
KeywordTok
  Token
L.ITcapiconv -> Token
KeywordTok
  Token
L.ITprimcallconv -> Token
KeywordTok
  Token
L.ITjavascriptcallconv -> Token
KeywordTok
  L.ITmdo Maybe FastString
_ -> Token
KeywordTok
  Token
L.ITfamily -> Token
KeywordTok
  Token
L.ITrole -> Token
KeywordTok
  Token
L.ITgroup -> Token
KeywordTok
  Token
L.ITby -> Token
KeywordTok
  Token
L.ITusing -> Token
KeywordTok
  Token
L.ITpattern -> Token
KeywordTok
  Token
L.ITstatic -> Token
KeywordTok
  Token
L.ITstock -> Token
KeywordTok
  Token
L.ITanyclass -> Token
KeywordTok
  Token
L.ITvia -> Token
KeywordTok
  Token
L.ITunit -> Token
KeywordTok
  Token
L.ITsignature -> Token
KeywordTok
  Token
L.ITdependency -> Token
KeywordTok
  Token
L.ITrequires -> Token
KeywordTok
  -- Pragmas
  L.ITinline_prag {} -> Token
PragmaTok
  L.ITspec_prag SourceText
_ -> Token
PragmaTok
  L.ITspec_inline_prag {} -> Token
PragmaTok
  L.ITsource_prag SourceText
_ -> Token
PragmaTok
  L.ITrules_prag SourceText
_ -> Token
PragmaTok
  L.ITwarning_prag SourceText
_ -> Token
PragmaTok
  L.ITdeprecated_prag SourceText
_ -> Token
PragmaTok
  L.ITline_prag SourceText
_ -> Token
PragmaTok
  L.ITcolumn_prag SourceText
_ -> Token
PragmaTok
  L.ITscc_prag SourceText
_ -> Token
PragmaTok
  L.ITgenerated_prag SourceText
_ -> Token
PragmaTok
  L.ITunpack_prag SourceText
_ -> Token
PragmaTok
  L.ITnounpack_prag SourceText
_ -> Token
PragmaTok
  L.ITann_prag SourceText
_ -> Token
PragmaTok
  L.ITcomplete_prag SourceText
_ -> Token
PragmaTok
  Token
L.ITclose_prag -> Token
PragmaTok
  L.IToptions_prag String
_ -> Token
PragmaTok
  L.ITinclude_prag String
_ -> Token
PragmaTok
  Token
L.ITlanguage_prag -> Token
PragmaTok
  L.ITminimal_prag SourceText
_ -> Token
PragmaTok
  L.IToverlappable_prag SourceText
_ -> Token
PragmaTok
  L.IToverlapping_prag SourceText
_ -> Token
PragmaTok
  L.IToverlaps_prag SourceText
_ -> Token
PragmaTok
  L.ITincoherent_prag SourceText
_ -> Token
PragmaTok
  L.ITctype SourceText
_ -> Token
PragmaTok
  Token
L.ITcomment_line_prag -> Token
PragmaTok
  -- Reserved symbols
  Token
L.ITdotdot -> Token
SymbolTok
  Token
L.ITcolon -> Token
SymbolTok
  L.ITdcolon IsUnicodeSyntax
_ -> Token
SymbolTok
  Token
L.ITequal -> Token
SymbolTok
  Token
L.ITlam -> Token
SymbolTok
  Token
L.ITlcase -> Token
SymbolTok
  Token
L.ITvbar -> Token
SymbolTok
  L.ITlarrow IsUnicodeSyntax
_ -> Token
SymbolTok
  L.ITrarrow IsUnicodeSyntax
_ -> Token
SymbolTok
  Token
L.ITlolly -> Token
SymbolTok
  Token
L.ITat -> Token
SymbolTok
  Token
L.ITtilde -> Token
SymbolTok
  L.ITdarrow IsUnicodeSyntax
_ -> Token
SymbolTok
  Token
L.ITbang -> Token
SymbolTok
  L.ITstar IsUnicodeSyntax
_ -> Token
SymbolTok
  Token
L.ITbiglam -> Token
SymbolTok
  Token
L.ITocurly -> Token
SymbolTok
  Token
L.ITccurly -> Token
SymbolTok
  Token
L.ITvocurly -> Token
SymbolTok
  Token
L.ITvccurly -> Token
SymbolTok
  Token
L.ITobrack -> Token
SymbolTok
  Token
L.ITopabrack -> Token
SymbolTok
  Token
L.ITcpabrack -> Token
SymbolTok
  Token
L.ITcbrack -> Token
SymbolTok
  Token
L.IToparen -> Token
SymbolTok
  Token
L.ITcparen -> Token
SymbolTok
  Token
L.IToubxparen -> Token
SymbolTok
  Token
L.ITcubxparen -> Token
SymbolTok
  Token
L.ITsemi -> Token
SymbolTok
  Token
L.ITcomma -> Token
SymbolTok
  Token
L.ITunderscore -> Token
SymbolTok
  Token
L.ITbackquote -> Token
SymbolTok
  Token
L.ITsimpleQuote -> Token
SymbolTok
  Token
L.ITpercent -> Token
SymbolTok
  -- NOTE GHC thinks these are reserved symbols, but I classify them as
  -- operators.
  Token
L.ITminus -> Token
OperatorTok
  Token
L.ITprefixminus -> Token
OperatorTok
  Token
L.ITdot -> Token
OperatorTok
  -- Identifiers
  L.ITvarid FastString
_ -> Token
VariableTok
  L.ITconid FastString
_ -> Token
ConstructorTok
  L.ITvarsym FastString
_ -> Token
OperatorTok
  L.ITconsym FastString
_ -> Token
OperatorTok
  L.ITqvarid (FastString, FastString)
_ -> Token
VariableTok
  L.ITqconid (FastString, FastString)
_ -> Token
ConstructorTok
  L.ITqvarsym (FastString, FastString)
_ -> Token
OperatorTok
  L.ITqconsym (FastString, FastString)
_ -> Token
OperatorTok
  L.ITdupipvarid FastString
_ -> Token
VariableTok
  L.ITlabelvarid FastString
_ -> Token
VariableTok
  -- Basic types
  L.ITchar SourceText
_ Char
_ -> Token
CharTok
  L.ITstring SourceText
_ FastString
_ -> Token
StringTok
  L.ITinteger IntegralLit
_ -> Token
IntegerTok
  L.ITrational FractionalLit
_ -> Token
RationalTok
  L.ITprimchar SourceText
_ Char
_ -> Token
CharTok
  L.ITprimstring SourceText
_ ByteString
_ -> Token
StringTok
  L.ITprimint SourceText
_ Integer
_ -> Token
IntegerTok
  L.ITprimword SourceText
_ Integer
_ -> Token
IntegerTok
  L.ITprimfloat FractionalLit
_ -> Token
RationalTok
  L.ITprimdouble FractionalLit
_ -> Token
RationalTok
  -- Template Haskell extension tokens
  L.ITopenExpQuote HasE
_ IsUnicodeSyntax
_ -> Token
SymbolTok
  Token
L.ITopenPatQuote -> Token
SymbolTok
  Token
L.ITopenDecQuote -> Token
SymbolTok
  Token
L.ITopenTypQuote -> Token
SymbolTok
  L.ITcloseQuote IsUnicodeSyntax
_ -> Token
SymbolTok
  L.ITopenTExpQuote HasE
_ -> Token
SymbolTok
  Token
L.ITcloseTExpQuote -> Token
SymbolTok
  Token
L.ITtyQuote -> Token
SymbolTok
  L.ITquasiQuote (FastString, FastString, PsSpan)
_ -> Token
SymbolTok
  L.ITqQuasiQuote (FastString, FastString, FastString, PsSpan)
_ -> Token
SymbolTok
  Token
L.ITdollar -> Token
SymbolTok
  Token
L.ITdollardollar -> Token
SymbolTok
  -- Arrow notation
  Token
L.ITproc -> Token
KeywordTok
  Token
L.ITrec -> Token
KeywordTok
  L.IToparenbar IsUnicodeSyntax
_ -> Token
SymbolTok
  L.ITcparenbar IsUnicodeSyntax
_ -> Token
SymbolTok
  L.ITlarrowtail IsUnicodeSyntax
_ -> Token
SymbolTok
  L.ITrarrowtail IsUnicodeSyntax
_ -> Token
SymbolTok
  L.ITLarrowtail IsUnicodeSyntax
_ -> Token
SymbolTok
  L.ITRarrowtail IsUnicodeSyntax
_ -> Token
SymbolTok
  -- Type application
  Token
L.ITtypeApp -> Token
SymbolTok
  -- Special
  L.ITunknown String
_ -> Token
OtherTok
  Token
L.ITeof -> Token
OtherTok -- normally is not included in results
  -- Documentation annotations
  L.ITdocCommentNext String
_ -> Token
CommentTok
  L.ITdocCommentPrev String
_ -> Token
CommentTok
  L.ITdocCommentNamed String
_ -> Token
CommentTok
  L.ITdocSection Int
_ String
_ -> Token
CommentTok
  L.ITdocOptions String
_ -> Token
CommentTok
  L.ITlineComment String
_ -> Token
CommentTok
  L.ITblockComment String
_ -> Token
CommentTok

----------------------------------------------------------------------------
-- Text traversing

-- | A type for 'Text' with line\/column location attached.
data Text'
  = Text'
      {-# UNPACK #-} !Int
      {-# UNPACK #-} !Int
      {-# UNPACK #-} !Text
  deriving (Int -> Text' -> ShowS
[Text'] -> ShowS
Text' -> String
(Int -> Text' -> ShowS)
-> (Text' -> String) -> ([Text'] -> ShowS) -> Show Text'
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Text'] -> ShowS
$cshowList :: [Text'] -> ShowS
show :: Text' -> String
$cshow :: Text' -> String
showsPrec :: Int -> Text' -> ShowS
$cshowsPrec :: Int -> Text' -> ShowS
Show)

-- | Create 'Text'' from 'Text'.
initText' :: Text -> Text'
initText' :: Text -> Text'
initText' = Int -> Int -> Text -> Text'
Text' Int
1 Int
1

-- | Try to fetch white space before start of span at 'Loc'.
tryFetchSpace :: Text' -> Loc -> Maybe (Text', Text)
tryFetchSpace :: Text' -> Loc -> Maybe (Text', Text)
tryFetchSpace Text'
txt (Loc Int
sl Int
sc Int
_ Int
_) =
  let (Text'
txt', Text
r) = Text' -> Int -> Int -> (Text', Text)
reachLoc Text'
txt Int
sl Int
sc
   in if Text -> Bool
T.null Text
r
        then Maybe (Text', Text)
forall a. Maybe a
Nothing
        else (Text', Text) -> Maybe (Text', Text)
forall a. a -> Maybe a
Just (Text'
txt', Text
r)

-- | Try to fetch the rest of 'Text'' stream.
tryFetchRest :: Text' -> Maybe (Text', Text)
tryFetchRest :: Text' -> Maybe (Text', Text)
tryFetchRest (Text' Int
l Int
c Text
txt) =
  if Text -> Bool
T.null Text
txt
    then Maybe (Text', Text)
forall a. Maybe a
Nothing
    else (Text', Text) -> Maybe (Text', Text)
forall a. a -> Maybe a
Just (Int -> Int -> Text -> Text'
Text' Int
l Int
c Text
"", Text
txt)

-- | Fetch a span at 'Loc'.
fetchSpan :: Text' -> Loc -> (Text', Text)
fetchSpan :: Text' -> Loc -> (Text', Text)
fetchSpan Text'
txt (Loc Int
_ Int
_ Int
el Int
ec) = Text' -> Int -> Int -> (Text', Text)
reachLoc Text'
txt Int
el Int
ec

-- | Reach given line\/column location and return 'Text' that has been
-- traversed.
reachLoc ::
  Text' ->
  -- | Line number to reach
  Int ->
  -- | Column number to reach
  Int ->
  (Text', Text)
reachLoc :: Text' -> Int -> Int -> (Text', Text)
reachLoc txt :: Text'
txt@(Text' Int
_ Int
_ Text
original) Int
l Int
c =
  let chunk :: Text
chunk = (Text' -> Maybe (Char, Text')) -> Text' -> Text
forall a. (a -> Maybe (Char, a)) -> a -> Text
T.unfoldr Text' -> Maybe (Char, Text')
f Text'
txt
      f :: Text' -> Maybe (Char, Text')
f (Text' Int
l' Int
c' Text
s) = do
        Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Int
l' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
l Bool -> Bool -> Bool
|| Int
c' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
c)
        (Char
ch, Text
s') <- Text -> Maybe (Char, Text)
T.uncons Text
s
        let (Int
l'', Int
c'') = case Char
ch of
              Char
'\n' -> (Int
l' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1, Int
1)
              Char
'\t' -> (Int
l', Int
c' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
- ((Int
c' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`rem` Int
8))
              Char
_ -> (Int
l', Int
c' Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
        (Char, Text') -> Maybe (Char, Text')
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
ch, Int -> Int -> Text -> Text'
Text' Int
l'' Int
c'' Text
s')
   in (Int -> Int -> Text -> Text'
Text' Int
l Int
c (Int -> Text -> Text
T.drop (Text -> Int
T.length Text
chunk) Text
original), Text
chunk)

----------------------------------------------------------------------------
-- Pragmas detection

-- | Detect file header pragma.
isHeaderPragma :: Text -> Bool
isHeaderPragma :: Text -> Bool
isHeaderPragma Text
txt0 = Maybe () -> Bool
forall a. Maybe a -> Bool
isJust (Maybe () -> Bool) -> Maybe () -> Bool
forall a b. (a -> b) -> a -> b
$ do
  Text
txt1 <- Text -> Text
T.stripStart (Text -> Text) -> Maybe Text -> Maybe Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> Text -> Maybe Text
T.stripPrefix Text
"{-#" Text
txt0
  Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Text -> Text -> Bool
T.isPrefixOf Text
"LANGUAGE" Text
txt1 Bool -> Bool -> Bool
|| Text -> Text -> Bool
T.isPrefixOf Text
"OPTIONS_GHC" Text
txt1)

----------------------------------------------------------------------------
-- Language extensions

-- | Language extensions we enable by default.
enabledExts :: [Extension]
enabledExts :: [Extension]
enabledExts =
  [ Extension
ForeignFunctionInterface,
    Extension
InterruptibleFFI,
    Extension
CApiFFI,
    Extension
Arrows,
    Extension
TemplateHaskell,
    Extension
TemplateHaskellQuotes,
    Extension
ImplicitParams,
    Extension
OverloadedLabels,
    Extension
ExplicitForAll,
    Extension
BangPatterns,
    Extension
PatternSynonyms,
    Extension
MagicHash,
    Extension
RecursiveDo,
    Extension
UnicodeSyntax,
    Extension
UnboxedTuples,
    Extension
UnboxedSums,
    Extension
DatatypeContexts,
    Extension
TransformListComp,
    Extension
QuasiQuotes,
    Extension
LambdaCase,
    Extension
BinaryLiterals,
    Extension
NegativeLiterals,
    Extension
HexFloatLiterals,
    Extension
TypeApplications,
    Extension
StaticPointers,
    Extension
NumericUnderscores,
    Extension
StarIsType
  ]