module Haddock.Backends.Hyperlinker.Parser (parse) where
import Data.Either ( isRight, isLeft )
import Data.List ( foldl', isPrefixOf, isSuffixOf )
import Data.Maybe ( maybeToList )
import GHC ( DynFlags, addSourceToTokens )
import SrcLoc
import FastString ( mkFastString )
import StringBuffer ( stringToStringBuffer )
import Lexer ( Token(..) )
import qualified Lexer as L
import Haddock.Backends.Hyperlinker.Types as T
parse :: DynFlags -> FilePath -> String -> [T.Token]
parse dflags fp s = ghcToks (processCPP dflags fp s)
processCPP :: DynFlags
-> FilePath
-> String
-> [(Located L.Token, String)]
processCPP dflags fpath s = addSrc . go start . splitCPP $ s
where
start = mkRealSrcLoc (mkFastString fpath) 1 1
addSrc = addSourceToTokens start (stringToStringBuffer s)
go :: RealSrcLoc -> [Either String String] -> [Located L.Token]
go _ [] = []
go pos ls =
let (hLinesRight, ls') = span isRight ls
(cppLinesLeft, rest) = span isLeft ls'
hSrc = concat [ hLine | Right hLine <- hLinesRight ]
cppSrc = concat [ cppLine | Left cppLine <- cppLinesLeft ]
in case L.lexTokenStream (stringToStringBuffer hSrc) pos dflags of
L.PFailed _ _ss _msg ->
let (src_pos, failed) = mkToken ITunknown pos hSrc
(new_pos, cpp) = mkToken ITlineComment src_pos cppSrc
in failed : cpp : go new_pos rest
L.POk ss toks ->
let (new_pos, cpp) = mkToken ITlineComment (L.loc ss) cppSrc
in toks ++ [cpp] ++ go new_pos rest
mkToken tok start' str =
let end = foldl' advanceSrcLoc start' str
in (end, L (RealSrcSpan $ mkRealSrcSpan start' end) (tok str))
splitCPP :: String -> [Either String String]
splitCPP "" = []
splitCPP s | isCPPline s = Left l : splitCPP rest
| otherwise = Right l : splitCPP rest
where
~(l, rest) = spanToNewline 0 s
isCPPline :: String -> Bool
isCPPline = isPrefixOf "#" . dropWhile (`elem` " \t") . take 5
spanToNewline :: Int
-> String
-> (String, String)
spanToNewline _ [] = ([], [])
spanToNewline n ('\n':str) | n <= 0 = ("\n", str)
spanToNewline n ('\\':'\n':str) =
let (str', rest) = spanToNewline n str
in ('\\':'\n':str', rest)
spanToNewline n ('{':'-':str) =
let (str', rest) = spanToNewline (n+1) str
in ('{':'-':str', rest)
spanToNewline n ('-':'}':str) =
let (str', rest) = spanToNewline (n-1) str
in ('-':'}':str', rest)
spanToNewline n (c:str) =
let (str', rest) = spanToNewline n str
in (c:str', rest)
ghcToks :: [(Located L.Token, String)] -> [T.Token]
ghcToks = reverse . (\(_,ts,_) -> ts) . foldl' go (start, [], False)
where
start = mkRealSrcLoc (mkFastString "lexing") 1 1
go :: (RealSrcLoc, [T.Token], Bool)
-> (Located L.Token, String)
-> (RealSrcLoc, [T.Token], Bool)
go (pos, toks, in_prag) (L l tok, raw) =
( next_pos
, classifiedTok ++ maybeToList white ++ toks
, inPragma in_prag tok
)
where
(next_pos, white) = mkWhitespace pos l
classifiedTok = [ Token (classify' tok) raw rss
| RealSrcSpan rss <- [l]
, not (null raw)
]
classify' | in_prag = const TkPragma
| otherwise = classify
mkWhitespace :: RealSrcLoc -> SrcSpan -> (RealSrcLoc, Maybe T.Token)
mkWhitespace prev spn =
case spn of
UnhelpfulSpan _ -> (prev,Nothing)
RealSrcSpan s | null wsstring -> (end, Nothing)
| otherwise -> (end, Just (Token TkSpace wsstring wsspan))
where
start = realSrcSpanStart s
end = realSrcSpanEnd s
wsspan = mkRealSrcSpan prev start
nls = srcLocLine start - srcLocLine prev
spaces = if nls == 0 then srcLocCol start - srcLocCol prev
else srcLocCol start - 1
wsstring = replicate nls '\n' ++ replicate spaces ' '
classify :: L.Token -> TokenType
classify tok =
case tok of
ITas -> TkKeyword
ITcase -> TkKeyword
ITclass -> TkKeyword
ITdata -> TkKeyword
ITdefault -> TkKeyword
ITderiving -> TkKeyword
ITdo -> TkKeyword
ITelse -> TkKeyword
IThiding -> TkKeyword
ITforeign -> TkKeyword
ITif -> TkKeyword
ITimport -> TkKeyword
ITin -> TkKeyword
ITinfix -> TkKeyword
ITinfixl -> TkKeyword
ITinfixr -> TkKeyword
ITinstance -> TkKeyword
ITlet -> TkKeyword
ITmodule -> TkKeyword
ITnewtype -> TkKeyword
ITof -> TkKeyword
ITqualified -> TkKeyword
ITthen -> TkKeyword
ITtype -> TkKeyword
ITwhere -> TkKeyword
ITforall {} -> TkKeyword
ITexport -> TkKeyword
ITlabel -> TkKeyword
ITdynamic -> TkKeyword
ITsafe -> TkKeyword
ITinterruptible -> TkKeyword
ITunsafe -> TkKeyword
ITstdcallconv -> TkKeyword
ITccallconv -> TkKeyword
ITcapiconv -> TkKeyword
ITprimcallconv -> TkKeyword
ITjavascriptcallconv -> TkKeyword
ITmdo -> TkKeyword
ITfamily -> TkKeyword
ITrole -> TkKeyword
ITgroup -> TkKeyword
ITby -> TkKeyword
ITusing -> TkKeyword
ITpattern -> TkKeyword
ITstatic -> TkKeyword
ITstock -> TkKeyword
ITanyclass -> TkKeyword
ITunit -> TkKeyword
ITsignature -> TkKeyword
ITdependency -> TkKeyword
ITrequires -> TkKeyword
ITinline_prag {} -> TkPragma
ITspec_prag {} -> TkPragma
ITspec_inline_prag {} -> TkPragma
ITsource_prag {} -> TkPragma
ITrules_prag {} -> TkPragma
ITwarning_prag {} -> TkPragma
ITdeprecated_prag {} -> TkPragma
ITline_prag {} -> TkPragma
ITcolumn_prag {} -> TkPragma
ITscc_prag {} -> TkPragma
ITgenerated_prag {} -> TkPragma
ITcore_prag {} -> TkPragma
ITunpack_prag {} -> TkPragma
ITnounpack_prag {} -> TkPragma
ITann_prag {} -> TkPragma
ITcomplete_prag {} -> TkPragma
ITclose_prag -> TkPragma
IToptions_prag {} -> TkPragma
ITinclude_prag {} -> TkPragma
ITlanguage_prag -> TkPragma
ITvect_prag {} -> TkPragma
ITvect_scalar_prag {} -> TkPragma
ITnovect_prag {} -> TkPragma
ITminimal_prag {} -> TkPragma
IToverlappable_prag {} -> TkPragma
IToverlapping_prag {} -> TkPragma
IToverlaps_prag {} -> TkPragma
ITincoherent_prag {} -> TkPragma
ITctype {} -> TkPragma
ITdotdot -> TkGlyph
ITcolon -> TkGlyph
ITdcolon {} -> TkGlyph
ITequal -> TkGlyph
ITlam -> TkGlyph
ITlcase -> TkGlyph
ITvbar -> TkGlyph
ITlarrow {} -> TkGlyph
ITrarrow {} -> TkGlyph
ITat -> TkGlyph
ITtilde -> TkGlyph
ITtildehsh -> TkGlyph
ITdarrow {} -> TkGlyph
ITminus -> TkGlyph
ITbang -> TkGlyph
ITdot -> TkOperator
ITtypeApp -> TkGlyph
ITbiglam -> TkGlyph
ITocurly -> TkSpecial
ITccurly -> TkSpecial
ITvocurly -> TkSpecial
ITvccurly -> TkSpecial
ITobrack -> TkSpecial
ITopabrack -> TkSpecial
ITcpabrack -> TkSpecial
ITcbrack -> TkSpecial
IToparen -> TkSpecial
ITcparen -> TkSpecial
IToubxparen -> TkSpecial
ITcubxparen -> TkSpecial
ITsemi -> TkSpecial
ITcomma -> TkSpecial
ITunderscore -> TkIdentifier
ITbackquote -> TkSpecial
ITsimpleQuote -> TkSpecial
ITvarid {} -> TkIdentifier
ITconid {} -> TkIdentifier
ITvarsym {} -> TkOperator
ITconsym {} -> TkOperator
ITqvarid {} -> TkIdentifier
ITqconid {} -> TkIdentifier
ITqvarsym {} -> TkOperator
ITqconsym {} -> TkOperator
ITdupipvarid {} -> TkUnknown
ITlabelvarid {} -> TkUnknown
ITchar {} -> TkChar
ITstring {} -> TkString
ITinteger {} -> TkNumber
ITrational {} -> TkNumber
ITprimchar {} -> TkChar
ITprimstring {} -> TkString
ITprimint {} -> TkNumber
ITprimword {} -> TkNumber
ITprimfloat {} -> TkNumber
ITprimdouble {} -> TkNumber
ITopenExpQuote {} -> TkSpecial
ITopenPatQuote -> TkSpecial
ITopenDecQuote -> TkSpecial
ITopenTypQuote -> TkSpecial
ITcloseQuote {} -> TkSpecial
ITopenTExpQuote {} -> TkSpecial
ITcloseTExpQuote -> TkSpecial
ITidEscape {} -> TkUnknown
ITparenEscape -> TkSpecial
ITidTyEscape {} -> TkUnknown
ITparenTyEscape -> TkSpecial
ITtyQuote -> TkSpecial
ITquasiQuote {} -> TkUnknown
ITqQuasiQuote {} -> TkUnknown
ITproc -> TkKeyword
ITrec -> TkKeyword
IToparenbar {} -> TkGlyph
ITcparenbar {} -> TkGlyph
ITlarrowtail {} -> TkGlyph
ITrarrowtail {} -> TkGlyph
ITLarrowtail {} -> TkGlyph
ITRarrowtail {} -> TkGlyph
ITunknown {} -> TkUnknown
ITeof -> TkUnknown
ITlineComment s
| isCPPline s -> TkCpp
| otherwise -> TkComment
ITdocCommentNext {} -> TkComment
ITdocCommentPrev {} -> TkComment
ITdocCommentNamed {} -> TkComment
ITdocSection {} -> TkComment
ITdocOptions {} -> TkComment
ITblockComment c
| isPrefixOf "{-#" c
, isSuffixOf "#-}" c -> TkPragma
| otherwise -> TkComment
inPragma :: Bool
-> L.Token
-> Bool
inPragma _ ITclose_prag = False
inPragma True _ = True
inPragma False tok =
case tok of
ITinline_prag {} -> True
ITspec_prag {} -> True
ITspec_inline_prag {} -> True
ITsource_prag {} -> True
ITrules_prag {} -> True
ITwarning_prag {} -> True
ITdeprecated_prag {} -> True
ITline_prag {} -> True
ITcolumn_prag {} -> True
ITscc_prag {} -> True
ITgenerated_prag {} -> True
ITcore_prag {} -> True
ITunpack_prag {} -> True
ITnounpack_prag {} -> True
ITann_prag {} -> True
ITcomplete_prag {} -> True
IToptions_prag {} -> True
ITinclude_prag {} -> True
ITlanguage_prag -> True
ITvect_prag {} -> True
ITvect_scalar_prag {} -> True
ITnovect_prag {} -> True
ITminimal_prag {} -> True
IToverlappable_prag {} -> True
IToverlapping_prag {} -> True
IToverlaps_prag {} -> True
ITincoherent_prag {} -> True
ITctype {} -> True
_ -> False