module Text.Highlighting.Kate.Syntax.Fsharp ( highlight, parseExpression, syntaxName, syntaxExtensions ) where
import Text.Highlighting.Kate.Definitions
import Text.Highlighting.Kate.Common
import Text.ParserCombinators.Parsec
import Control.Monad (when)
import Data.Map (fromList)
import Data.Maybe (fromMaybe, maybeToList)
import qualified Data.Set as Set
syntaxName :: String
syntaxName = "FSharp"
syntaxExtensions :: String
syntaxExtensions = "*.fs;*.fsi;*.fsx"
highlight :: String -> Either String [SourceLine]
highlight input =
case runParser parseSource startingState "source" input of
Left err -> Left $ show err
Right result -> Right result
parseExpression :: GenParser Char SyntaxState LabeledSource
parseExpression = do
st <- getState
let oldLang = synStLanguage st
setState $ st { synStLanguage = "FSharp" }
context <- currentContext <|> (pushContext "Normal" >> currentContext)
result <- parseRules context
updateState $ \st -> st { synStLanguage = oldLang }
return result
parseSource = do
lineContents <- lookAhead wholeLine
updateState $ \st -> st { synStCurrentLine = lineContents }
result <- manyTill parseSourceLine eof
return $ map normalizeHighlighting result
startingState = SyntaxState {synStContexts = fromList [("FSharp",["Normal"])], synStLanguage = "FSharp", synStCurrentLine = "", synStCharsParsedInLine = 0, synStPrevChar = '\n', synStCaseSensitive = True, synStKeywordCaseSensitive = True, synStCaptures = []}
parseSourceLine = manyTill parseExpressionInternal pEndLine
pEndLine = do
lookAhead $ newline <|> (eof >> return '\n')
context <- currentContext
case context of
"Normal" -> return () >> pHandleEndLine
"Singleline Comment" -> (popContext) >> pEndLine
"Multiline Comment" -> return () >> pHandleEndLine
"String Constant" -> return () >> pHandleEndLine
"Block" -> return () >> pHandleEndLine
"Sig" -> return () >> pHandleEndLine
"Struct" -> return () >> pHandleEndLine
"Object" -> return () >> pHandleEndLine
"ModuleEnv" -> return () >> pHandleEndLine
"ModuleEnv2" -> return () >> pHandleEndLine
"Camlp4 Quotation Constant" -> return () >> pHandleEndLine
_ -> pHandleEndLine
withAttribute attr txt = do
when (null txt) $ fail "Parser matched no text"
let labs = attr : maybeToList (lookup attr styles)
st <- getState
let oldCharsParsed = synStCharsParsedInLine st
let prevchar = if null txt then '\n' else last txt
updateState $ \st -> st { synStCharsParsedInLine = oldCharsParsed + length txt, synStPrevChar = prevchar }
return (labs, txt)
styles = [("Keyword","kw"),("Core Data Type","dt"),("Decimal","dv"),("Hexadecimal","bn"),("Octal","bn"),("Binary","bn"),("Float","fl"),("Character","ch"),("String","st"),("Escaped characters","ch"),("Comment","co"),("Camlp4 Quotation","st"),("Directive","ot"),("Region Marker","kw")]
parseExpressionInternal = do
context <- currentContext
parseRules context <|> (pDefault >>= withAttribute (fromMaybe "" $ lookup context defaultAttributes))
list_keywords = Set.fromList $ words $ "abstract and as assert base class delegate dowcast downto elif else exception extern false for fun function functor global if in inherit inline interfaece internal lazy let match member mutable namespace new not null of or override private public rec ref return static then to true try type upcast use val void when while with yield"
list_core_types = Set.fromList $ words $ "bool byte sbyte int16 uint16 int uint32 int64 uint64 nativeint unativeint char string decimal unit void float32 single float double bigint option seq"
list_symbols = Set.fromList $ words $ "?"
list_end = Set.fromList $ words $ "end"
list_sig = Set.fromList $ words $ "sig"
list_struct = Set.fromList $ words $ "struct"
list_object = Set.fromList $ words $ "object"
list_begin = Set.fromList $ words $ "begin"
list_do = Set.fromList $ words $ "do"
list_done = Set.fromList $ words $ "done"
list_module'5fenvironment = Set.fromList $ words $ "module open"
regex_'60'5cs'2a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a = compileRegex "`\\s*[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*"
regex_'5bA'2dZ'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'5cs'2a'5c'2e = compileRegex "[A-Z][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*\\s*\\."
regex_'5bA'2dZ'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a = compileRegex "[A-Z][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*"
regex_'23'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'2e'2a'24 = compileRegex "#[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*.*$"
regex_'27'28'28'5c'5c'5bntbr'27'22'5c'5c'5d'7c'5c'5c'5b0'2d9'5d'7b3'7d'7c'5c'5cx'5b0'2d9A'2dFa'2df'5d'7b2'7d'29'7c'5b'5e'27'5d'29'27 = compileRegex "'((\\\\[ntbr'\"\\\\]|\\\\[0-9]{3}|\\\\x[0-9A-Fa-f]{2})|[^'])'"
regex_'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c = compileRegex "<:[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*<"
regex_'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a = compileRegex "[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*"
regex_'2d'3f0'5bxX'5d'5b0'2d9A'2dFa'2df'5f'5d'2b = compileRegex "-?0[xX][0-9A-Fa-f_]+"
regex_'2d'3f0'5boO'5d'5b0'2d7'5f'5d'2b = compileRegex "-?0[oO][0-7_]+"
regex_'2d'3f0'5bbB'5d'5b01'5f'5d'2b = compileRegex "-?0[bB][01_]+"
regex_'2d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'28'28'5c'2e'28'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29'3f'28'5beE'5d'5b'2d'2b'5d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29'3f'29'7c'28'5beE'5d'5b'2d'2b'5d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29'29 = compileRegex "-?[0-9][0-9_]*((\\.([0-9][0-9_]*)?([eE][-+]?[0-9][0-9_]*)?)|([eE][-+]?[0-9][0-9_]*))"
regex_'2d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a = compileRegex "-?[0-9][0-9_]*"
regex_'28'5c'5c'5bntbr'27'22'5c'5c'5d'7c'5c'5c'5b0'2d9'5d'7b3'7d'7c'5c'5cx'5b0'2d9A'2dFa'2df'5d'7b2'7d'29 = compileRegex "(\\\\[ntbr'\"\\\\]|\\\\[0-9]{3}|\\\\x[0-9A-Fa-f]{2})"
regex_'5c'5c'24 = compileRegex "\\\\$"
regex_'5c'5c'28'5c'5c'7c'3e'3e'7c'3c'3c'29 = compileRegex "\\\\(\\\\|>>|<<)"
regex_'5c'5c'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c = compileRegex "\\\\<:[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*<"
defaultAttributes = [("Normal","Normal Text"),("Singleline Comment","Comment"),("Multiline Comment","Comment"),("String Constant","String"),("Block","Normal text"),("Sig","Normal text"),("Struct","Normal text"),("Object","Normal text"),("ModuleEnv","Normal text"),("ModuleEnv2","Normal text"),("Camlp4 Quotation Constant","Camlp4 Quotation")]
parseRules "Normal" =
do (attr, result) <- (((pDetect2Chars False '(' '*' >>= withAttribute "Comment") >>~ pushContext "Multiline Comment")
<|>
((pDetect2Chars False '/' '/' >>= withAttribute "Normal Text") >>~ pushContext "Singleline Comment")
<|>
((pDetectChar False '(' >>= withAttribute "Symbol"))
<|>
((pDetectChar False ')' >>= withAttribute "Symbol"))
<|>
((pDetectChar False '{' >>= withAttribute "Symbol"))
<|>
((pDetectChar False '}' >>= withAttribute "Symbol"))
<|>
((pDetect2Chars False '[' '<' >>= withAttribute "Symbol"))
<|>
((pDetect2Chars False '>' ']' >>= withAttribute "Symbol"))
<|>
((pDetect2Chars False '[' '|' >>= withAttribute "Symbol"))
<|>
((pDetect2Chars False '|' ']' >>= withAttribute "Symbol"))
<|>
((pDetectChar False '[' >>= withAttribute "Symbol"))
<|>
((pDetectChar False ']' >>= withAttribute "Symbol"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_do >>= withAttribute "Keyword"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_done >>= withAttribute "Keyword"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_module'5fenvironment >>= withAttribute "Directive") >>~ pushContext "ModuleEnv")
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_begin >>= withAttribute "Region Marker") >>~ pushContext "Block")
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_object >>= withAttribute "Region Marker") >>~ pushContext "Object")
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_sig >>= withAttribute "Region Marker") >>~ pushContext "Sig")
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_struct >>= withAttribute "Region Marker") >>~ pushContext "Struct")
<|>
((pRegExpr regex_'60'5cs'2a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a >>= withAttribute "Constructor"))
<|>
((lookAhead (pRegExpr regex_'5bA'2dZ'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'5cs'2a'5c'2e) >> return ([],"") ) >>~ pushContext "ModuleEnv2")
<|>
((pRegExpr regex_'5bA'2dZ'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a >>= withAttribute "Constructor"))
<|>
((pFirstNonSpace >> pRegExpr regex_'23'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'2e'2a'24 >>= withAttribute "Directive"))
<|>
((pDetectChar False '"' >>= withAttribute "String") >>~ pushContext "String Constant")
<|>
((pRegExpr regex_'27'28'28'5c'5c'5bntbr'27'22'5c'5c'5d'7c'5c'5c'5b0'2d9'5d'7b3'7d'7c'5c'5cx'5b0'2d9A'2dFa'2df'5d'7b2'7d'29'7c'5b'5e'27'5d'29'27 >>= withAttribute "Character"))
<|>
((pDetect2Chars False '<' '<' >>= withAttribute "Camlp4 Quotation") >>~ pushContext "Camlp4 Quotation Constant")
<|>
((pRegExpr regex_'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c >>= withAttribute "Camlp4 Quotation") >>~ pushContext "Camlp4 Quotation Constant")
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_keywords >>= withAttribute "Keyword"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_core_types >>= withAttribute "Core Data Type"))
<|>
((pRegExpr regex_'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a >>= withAttribute "Identifier"))
<|>
((pRegExpr regex_'2d'3f0'5bxX'5d'5b0'2d9A'2dFa'2df'5f'5d'2b >>= withAttribute "Hexadecimal"))
<|>
((pRegExpr regex_'2d'3f0'5boO'5d'5b0'2d7'5f'5d'2b >>= withAttribute "Octal"))
<|>
((pRegExpr regex_'2d'3f0'5bbB'5d'5b01'5f'5d'2b >>= withAttribute "Binary"))
<|>
((pRegExpr regex_'2d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'28'28'5c'2e'28'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29'3f'28'5beE'5d'5b'2d'2b'5d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29'3f'29'7c'28'5beE'5d'5b'2d'2b'5d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29'29 >>= withAttribute "Float"))
<|>
((pRegExpr regex_'2d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a >>= withAttribute "Decimal")))
return (attr, result)
parseRules "Singleline Comment" =
pzero
parseRules "Multiline Comment" =
do (attr, result) <- (((pDetect2Chars False '*' ')' >>= withAttribute "Comment") >>~ (popContext))
<|>
((pDetect2Chars False '(' '*' >>= withAttribute "Comment") >>~ pushContext "Multiline Comment"))
return (attr, result)
parseRules "String Constant" =
do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "String") >>~ (popContext))
<|>
((pRegExpr regex_'28'5c'5c'5bntbr'27'22'5c'5c'5d'7c'5c'5c'5b0'2d9'5d'7b3'7d'7c'5c'5cx'5b0'2d9A'2dFa'2df'5d'7b2'7d'29 >>= withAttribute "Escaped characters"))
<|>
((pRegExpr regex_'5c'5c'24 >>= withAttribute "Escaped characters")))
return (attr, result)
parseRules "Block" =
do (attr, result) <- (((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_end >>= withAttribute "Region Marker") >>~ (popContext))
<|>
((parseRules "Normal")))
return (attr, result)
parseRules "Sig" =
do (attr, result) <- (((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_end >>= withAttribute "Region Marker") >>~ (popContext))
<|>
((parseRules "Normal")))
return (attr, result)
parseRules "Struct" =
do (attr, result) <- (((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_end >>= withAttribute "Region Marker") >>~ (popContext))
<|>
((parseRules "Normal")))
return (attr, result)
parseRules "Object" =
do (attr, result) <- (((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_end >>= withAttribute "Region Marker") >>~ (popContext))
<|>
((parseRules "Normal")))
return (attr, result)
parseRules "ModuleEnv" =
do (attr, result) <- (((pDetectSpaces >>= withAttribute "Normal text"))
<|>
((lookAhead (pRegExpr regex_'5bA'2dZ'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'5cs'2a'5c'2e) >> return ([],"") ) >>~ pushContext "ModuleEnv2")
<|>
((pRegExpr regex_'5bA'2dZ'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a >>= withAttribute "Module") >>~ (popContext))
<|>
((popContext) >> return ([], "")))
return (attr, result)
parseRules "ModuleEnv2" =
do (attr, result) <- (((pRegExpr regex_'5bA'2dZ'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a >>= withAttribute "Module"))
<|>
((pDetectSpaces >>= withAttribute "Normal text"))
<|>
((pString False "." >>= withAttribute "Keyword") >>~ (popContext)))
return (attr, result)
parseRules "Camlp4 Quotation Constant" =
do (attr, result) <- (((pDetect2Chars False '>' '>' >>= withAttribute "Camlp4 Quotation") >>~ (popContext))
<|>
((pDetect2Chars False '<' '<' >>= withAttribute "Camlp4 Quotation") >>~ pushContext "Camlp4 Quotation Constant")
<|>
((pRegExpr regex_'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c >>= withAttribute "Camlp4 Quotation") >>~ pushContext "Camlp4 Quotation Constant")
<|>
((pRegExpr regex_'5c'5c'28'5c'5c'7c'3e'3e'7c'3c'3c'29 >>= withAttribute "Escaped characters"))
<|>
((pRegExpr regex_'5c'5c'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c >>= withAttribute "Escaped characters")))
return (attr, result)
parseRules x = fail $ "Unknown context" ++ x