module Text.Highlighting.Kate.Syntax.Python ( highlight, parseExpression, syntaxName, syntaxExtensions ) where
import Text.Highlighting.Kate.Definitions
import Text.Highlighting.Kate.Common
import Text.ParserCombinators.Parsec
import Data.List (nub)
import qualified Data.Set as Set
import Data.Map (fromList)
import Data.Maybe (fromMaybe)
syntaxName :: String
syntaxName = "Python"
syntaxExtensions :: String
syntaxExtensions = "*.py;*.pyw"
highlight :: String -> Either String [SourceLine]
highlight input =
case runParser parseSource startingState "source" input of
Left err -> Left $ show err
Right result -> Right result
parseExpression :: GenParser Char SyntaxState LabeledSource
parseExpression = do
st <- getState
let oldLang = synStLanguage st
setState $ st { synStLanguage = "Python" }
context <- currentContext <|> (pushContext "Normal" >> currentContext)
result <- parseRules context
updateState $ \st -> st { synStLanguage = oldLang }
return result
parseSource = do
lineContents <- lookAhead wholeLine
updateState $ \st -> st { synStCurrentLine = lineContents }
result <- manyTill parseSourceLine eof
return $ map normalizeHighlighting result
startingState = SyntaxState {synStContexts = fromList [("Python",["Normal"])], synStLanguage = "Python", synStCurrentLine = "", synStCharsParsedInLine = 0, synStPrevChar = '\n', synStCaseSensitive = True, synStKeywordCaseSensitive = True, synStCaptures = []}
parseSourceLine = manyTill parseExpressionInternal pEndLine
pEndLine = do
newline <|> (eof >> return '\n')
context <- currentContext
case context of
"Normal" -> return ()
"parenthesised" -> return ()
"Tripple A-comment" -> return ()
"Tripple Q-comment" -> return ()
"Tripple A-string" -> return ()
"Raw Tripple A-string" -> return ()
"Tripple Q-string" -> return ()
"Raw Tripple Q-string" -> return ()
"Single A-comment" -> return ()
"Single Q-comment" -> return ()
"Single A-string" -> return ()
"Single Q-string" -> return ()
"Raw A-string" -> return ()
"Raw Q-string" -> return ()
_ -> return ()
lineContents <- lookAhead wholeLine
updateState $ \st -> st { synStCurrentLine = lineContents, synStCharsParsedInLine = 0, synStPrevChar = '\n' }
withAttribute attr txt = do
if null txt
then fail "Parser matched no text"
else return ()
let style = fromMaybe "" $ lookup attr styles
st <- getState
let oldCharsParsed = synStCharsParsedInLine st
let prevchar = if null txt then '\n' else last txt
updateState $ \st -> st { synStCharsParsedInLine = oldCharsParsed + length txt, synStPrevChar = prevchar }
return (nub [style, attr], txt)
styles = [("Normal Text","Normal"),("Definition Keyword","Keyword"),("Operator","Normal"),("String Substitution","Normal"),("Command Keyword","Keyword"),("Flow Control Keyword","Keyword"),("Builtin Function","DataType"),("Special Variable","Others"),("Extensions","Others"),("Preprocessor","Char"),("String Char","Char"),("Long","Others"),("Float","Float"),("Int","DecVal"),("Hex","Others"),("Octal","Others"),("Complex","Others"),("Comment","Comment"),("String","String"),("Raw String","String")]
parseExpressionInternal = do
context <- currentContext
parseRules context <|> (pDefault >>= withAttribute (fromMaybe "" $ lookup context defaultAttributes))
list_prep = Set.fromList $ words $ "import from as"
list_defs = Set.fromList $ words $ "class def del global lambda"
list_operators = Set.fromList $ words $ "and assert in is not or"
list_commands = Set.fromList $ words $ "exec print"
list_flow = Set.fromList $ words $ "break continue elif else except finally for if pass raise return try while yield"
list_builtinfuncs = Set.fromList $ words $ "__import__ abs all any apply basestring bool buffer callable chr classmethod cmp coerce compile complex delattr dict dir divmod enumerate eval execfile file filter float frozenset getattr globals hasattr hash hex id input int intern isinstance issubclass iter len list locals long map max min object oct open ord pow property range raw_input reduce reload repr reversed round set setattr slice sorted staticmethod str sum super tuple type unichr unicode vars xrange zip"
list_specialvars = Set.fromList $ words $ "None self True False NotImplemented Ellipsis"
list_bindings = Set.fromList $ words $ "SIGNAL SLOT connect"
regex_'5ba'2dzA'2dZ'5f'5d'5ba'2dzA'2dZ'5f0'2d9'5d'2b = compileRegex "[a-zA-Z_][a-zA-Z_0-9]+"
regex__'28'28'28'5b0'2d9'5d'2a'5c'2e'5b0'2d9'5d'2b'7c'5b0'2d9'5d'2b'5c'2e'29'7c'28'5b0'2d9'5d'2b'7c'28'5b0'2d9'5d'2a'5c'2e'5b0'2d9'5d'2b'7c'5b0'2d9'5d'2b'5c'2e'29'29'5beE'5d'28'5c'2b'7c'2d'29'3f'5b0'2d9'5d'2b'29'7c'5b0'2d9'5d'2b'29'5bjJ'5d = compileRegex " ((([0-9]*\\.[0-9]+|[0-9]+\\.)|([0-9]+|([0-9]*\\.[0-9]+|[0-9]+\\.))[eE](\\+|-)?[0-9]+)|[0-9]+)[jJ]"
regex_'28'5b0'2d9'5d'2b'5c'2e'5b0'2d9'5d'2a'7c'5c'2e'5b0'2d9'5d'2b'29'28'5beE'5d'5b0'2d9'5d'2b'29'3f = compileRegex "([0-9]+\\.[0-9]*|\\.[0-9]+)([eE][0-9]+)?"
regex_'28'5b1'2d9'5d'5b0'2d9'5d'2a'28'5beE'5d'5b0'2d9'5d'2b'29'3f'7c0'29 = compileRegex "([1-9][0-9]*([eE][0-9]+)?|0)"
regex_'5b1'2d9'5d'5b0'2d9'5d'2a'28'5beE'5d'5b0'2d9'2e'5d'2b'29'3f'5bLl'5d = compileRegex "[1-9][0-9]*([eE][0-9.]+)?[Ll]"
regex_0'5bXx'5d'5b0'2d9a'2dfA'2dF'5d'2b = compileRegex "0[Xx][0-9a-fA-F]+"
regex_0'5b1'2d9'5d'5b0'2d9'5d'2a = compileRegex "0[1-9][0-9]*"
regex_'5brR'5d'27'27'27 = compileRegex "[rR]'''"
regex_'5brR'5d'22'22'22 = compileRegex "[rR]\"\"\""
regex_'5brR'5d'27 = compileRegex "[rR]'"
regex_'5brR'5d'22 = compileRegex "[rR]\""
regex_'23'2e'2a'24 = compileRegex "#.*$"
regex_'5cs'2a'27'27'27 = compileRegex "\\s*'''"
regex_'5cs'2a'22'22'22 = compileRegex "\\s*\"\"\""
regex_'5b'2b'2a'2f'25'5c'7c'3d'3b'5c'21'3c'3e'21'5e'26'7e'2d'5d = compileRegex "[+*/%\\|=;\\!<>!^&~-]"
regex_'25'5ba'2dzA'2dZ'5d = compileRegex "%[a-zA-Z]"
regex_'22'22'22 = compileRegex "\"\"\""
regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d = compileRegex "%\\([a-zA-Z0-9_]+\\)[a-zA-Z]"
regex_'27'27'27 = compileRegex "'''"
defaultAttributes = [("Normal","Normal Text"),("parenthesised","Normal Text"),("Tripple A-comment","Comment"),("Tripple Q-comment","Comment"),("Tripple A-string","String"),("Raw Tripple A-string","Raw String"),("Tripple Q-string","String"),("Raw Tripple Q-string","Raw String"),("Single A-comment","Comment"),("Single Q-comment","Comment"),("Single A-string","String"),("Single Q-string","String"),("Raw A-string","Raw String"),("Raw Q-string","Raw String")]
parseRules "Normal" =
do (attr, result) <- (((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_prep >>= withAttribute "Preprocessor"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_defs >>= withAttribute "Definition Keyword"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_operators >>= withAttribute "Operator"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_commands >>= withAttribute "Command Keyword"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_flow >>= withAttribute "Flow Control Keyword"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_builtinfuncs >>= withAttribute "Builtin Function"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_specialvars >>= withAttribute "Special Variable"))
<|>
((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_bindings >>= withAttribute "Extensions"))
<|>
((pRegExpr regex_'5ba'2dzA'2dZ'5f'5d'5ba'2dzA'2dZ'5f0'2d9'5d'2b >>= withAttribute "Normal"))
<|>
((pRegExpr regex__'28'28'28'5b0'2d9'5d'2a'5c'2e'5b0'2d9'5d'2b'7c'5b0'2d9'5d'2b'5c'2e'29'7c'28'5b0'2d9'5d'2b'7c'28'5b0'2d9'5d'2a'5c'2e'5b0'2d9'5d'2b'7c'5b0'2d9'5d'2b'5c'2e'29'29'5beE'5d'28'5c'2b'7c'2d'29'3f'5b0'2d9'5d'2b'29'7c'5b0'2d9'5d'2b'29'5bjJ'5d >>= withAttribute "Complex"))
<|>
((pRegExpr regex_'28'5b0'2d9'5d'2b'5c'2e'5b0'2d9'5d'2a'7c'5c'2e'5b0'2d9'5d'2b'29'28'5beE'5d'5b0'2d9'5d'2b'29'3f >>= withAttribute "Float"))
<|>
((pRegExpr regex_'28'5b1'2d9'5d'5b0'2d9'5d'2a'28'5beE'5d'5b0'2d9'5d'2b'29'3f'7c0'29 >>= withAttribute "Int"))
<|>
((pRegExpr regex_'5b1'2d9'5d'5b0'2d9'5d'2a'28'5beE'5d'5b0'2d9'2e'5d'2b'29'3f'5bLl'5d >>= withAttribute "Long"))
<|>
((pRegExpr regex_0'5bXx'5d'5b0'2d9a'2dfA'2dF'5d'2b >>= withAttribute "Hex"))
<|>
((pRegExpr regex_0'5b1'2d9'5d'5b0'2d9'5d'2a >>= withAttribute "Octal"))
<|>
((pRegExpr regex_'5brR'5d'27'27'27 >>= withAttribute "Raw String") >>~ pushContext "Raw Tripple A-string")
<|>
((pRegExpr regex_'5brR'5d'22'22'22 >>= withAttribute "Raw String") >>~ pushContext "Raw Tripple Q-string")
<|>
((pRegExpr regex_'5brR'5d'27 >>= withAttribute "Raw String") >>~ pushContext "Raw A-string")
<|>
((pRegExpr regex_'5brR'5d'22 >>= withAttribute "Raw String") >>~ pushContext "Raw Q-string")
<|>
((pRegExpr regex_'23'2e'2a'24 >>= withAttribute "Comment"))
<|>
((pColumn 0 >> pRegExpr regex_'5cs'2a'27'27'27 >>= withAttribute "Comment") >>~ pushContext "Tripple A-comment")
<|>
((pColumn 0 >> pRegExpr regex_'5cs'2a'22'22'22 >>= withAttribute "Comment") >>~ pushContext "Tripple Q-comment")
<|>
((pString False "'''" >>= withAttribute "String") >>~ pushContext "Tripple A-string")
<|>
((pString False "\"\"\"" >>= withAttribute "String") >>~ pushContext "Tripple Q-string")
<|>
((pDetectChar False '\'' >>= withAttribute "String") >>~ pushContext "Single A-string")
<|>
((pDetectChar False '"' >>= withAttribute "String") >>~ pushContext "Single Q-string")
<|>
((pDetectChar False '(' >>= withAttribute "Operator") >>~ pushContext "parenthesised")
<|>
((pDetectChar False ')' >>= withAttribute "Operator") >>~ (popContext >> return ()))
<|>
((pRegExpr regex_'5b'2b'2a'2f'25'5c'7c'3d'3b'5c'21'3c'3e'21'5e'26'7e'2d'5d >>= withAttribute "Operator"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution")))
return (attr, result)
parseRules "parenthesised" =
do (attr, result) <- ((parseRules "Normal"))
return (attr, result)
parseRules "Tripple A-comment" =
do (attr, result) <- ((pString False "'''" >>= withAttribute "Comment") >>~ (popContext >> return ()))
return (attr, result)
parseRules "Tripple Q-comment" =
do (attr, result) <- (((pHlCChar >>= withAttribute "Comment"))
<|>
((pRegExpr regex_'22'22'22 >>= withAttribute "Comment") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Tripple A-string" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "String Char"))
<|>
((pRegExpr regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'27'27'27 >>= withAttribute "String") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Raw Tripple A-string" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "Raw String"))
<|>
((pRegExpr regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'27'27'27 >>= withAttribute "String") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Tripple Q-string" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "String Char"))
<|>
((pRegExpr regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'22'22'22 >>= withAttribute "String") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Raw Tripple Q-string" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "Raw String"))
<|>
((pRegExpr regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'22'22'22 >>= withAttribute "String") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Single A-comment" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "Comment"))
<|>
((pDetectChar False '\'' >>= withAttribute "Comment") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Single Q-comment" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "Comment"))
<|>
((pDetectChar False '"' >>= withAttribute "Comment") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Single A-string" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "String Char"))
<|>
((pRegExpr regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pDetectChar False '\'' >>= withAttribute "String") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Single Q-string" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "String Char"))
<|>
((pRegExpr regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pDetectChar False '"' >>= withAttribute "String") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Raw A-string" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "Raw String"))
<|>
((pRegExpr regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pDetectChar False '\'' >>= withAttribute "Raw String") >>~ (popContext >> return ())))
return (attr, result)
parseRules "Raw Q-string" =
do (attr, result) <- (((pHlCStringChar >>= withAttribute "Raw String"))
<|>
((pRegExpr regex_'25'5c'28'5ba'2dzA'2dZ0'2d9'5f'5d'2b'5c'29'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pRegExpr regex_'25'5ba'2dzA'2dZ'5d >>= withAttribute "String Substitution"))
<|>
((pDetectChar False '"' >>= withAttribute "Raw String") >>~ (popContext >> return ())))
return (attr, result)
parseRules x = fail $ "Unknown context" ++ x