module Text.Highlighting.Kate.Syntax.Xml ( highlight, parseExpression, syntaxName, syntaxExtensions ) where
import Text.Highlighting.Kate.Definitions
import Text.Highlighting.Kate.Common
import qualified Text.Highlighting.Kate.Syntax.Alert
import Text.ParserCombinators.Parsec
import Control.Monad (when)
import Data.Map (fromList)
import Data.Maybe (fromMaybe, maybeToList)
syntaxName :: String
syntaxName = "XML"
syntaxExtensions :: String
syntaxExtensions = "*.docbook;*.xml;*.rc;*.daml;*.rdf;*.rss;*.xspf;*.xsd;*.svg;*.ui;*.kcfg;*.qrc;*.wsdl"
highlight :: String -> Either String [SourceLine]
highlight input =
case runParser parseSource startingState "source" input of
Left err -> Left $ show err
Right result -> Right result
parseExpression :: GenParser Char SyntaxState LabeledSource
parseExpression = do
st <- getState
let oldLang = synStLanguage st
setState $ st { synStLanguage = "XML" }
context <- currentContext <|> (pushContext "Start" >> currentContext)
result <- parseRules context
updateState $ \st -> st { synStLanguage = oldLang }
return result
parseSource = do
lineContents <- lookAhead wholeLine
updateState $ \st -> st { synStCurrentLine = lineContents }
result <- manyTill parseSourceLine eof
return $ map normalizeHighlighting result
startingState = SyntaxState {synStContexts = fromList [("XML",["Start"])], synStLanguage = "XML", synStCurrentLine = "", synStCharsParsedInLine = 0, synStPrevChar = '\n', synStCaseSensitive = True, synStKeywordCaseSensitive = True, synStCaptures = []}
parseSourceLine = manyTill parseExpressionInternal pEndLine
pEndLine = do
lookAhead $ newline <|> (eof >> return '\n')
context <- currentContext
case context of
"Start" -> return () >> pHandleEndLine
"FindXML" -> return () >> pHandleEndLine
"FindEntityRefs" -> return () >> pHandleEndLine
"FindPEntityRefs" -> return () >> pHandleEndLine
"Comment" -> return () >> pHandleEndLine
"CDATA" -> return () >> pHandleEndLine
"PI" -> return () >> pHandleEndLine
"Doctype" -> return () >> pHandleEndLine
"Doctype Internal Subset" -> return () >> pHandleEndLine
"Doctype Markupdecl" -> return () >> pHandleEndLine
"Doctype Markupdecl DQ" -> return () >> pHandleEndLine
"Doctype Markupdecl SQ" -> return () >> pHandleEndLine
"Element" -> return () >> pHandleEndLine
"El Content" -> return () >> pHandleEndLine
"El End" -> return () >> pHandleEndLine
"Attribute" -> return () >> pHandleEndLine
"Value" -> return () >> pHandleEndLine
"Value DQ" -> return () >> pHandleEndLine
"Value SQ" -> return () >> pHandleEndLine
_ -> pHandleEndLine
withAttribute attr txt = do
when (null txt) $ fail "Parser matched no text"
let labs = attr : maybeToList (lookup attr styles)
st <- getState
let oldCharsParsed = synStCharsParsedInLine st
let prevchar = if null txt then '\n' else last txt
updateState $ \st -> st { synStCharsParsedInLine = oldCharsParsed + length txt, synStPrevChar = prevchar }
return (labs, txt)
styles = [("Comment","co"),("CDATA","bn"),("Processing Instruction","kw"),("Doctype","dt"),("Element","kw"),("Attribute","ot"),("Value","st"),("EntityRef","dv"),("PEntityRef","dv"),("Error","er")]
parseExpressionInternal = do
context <- currentContext
parseRules context <|> (pDefault >>= withAttribute (fromMaybe "" $ lookup context defaultAttributes))
regex_'3c'21DOCTYPE'5cs'2b = compileRegex "<!DOCTYPE\\s+"
regex_'3c'5c'3f'5b'5cw'3a'5f'2d'5d'2a = compileRegex "<\\?[\\w:_-]*"
regex_'3c'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "<(?![0-9])[\\w_:][\\w.:_-]*"
regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b = compileRegex "&(#[0-9]+|#[xX][0-9A-Fa-f]+|(?![0-9])[\\w_:][\\w.:_-]*);"
regex_'25'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'3b = compileRegex "%(?![0-9])[\\w_:][\\w.:_-]*;"
regex_'2d'28'2d'28'3f'21'2d'3e'29'29'2b = compileRegex "-(-(?!->))+"
regex_'3c'21'28ELEMENT'7cENTITY'7cATTLIST'7cNOTATION'29'5cb = compileRegex "<!(ELEMENT|ENTITY|ATTLIST|NOTATION)\\b"
regex_'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "(?![0-9])[\\w_:][\\w.:_-]*"
regex_'5cs'2b'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "\\s+(?![0-9])[\\w_:][\\w.:_-]*"
regex_'5cS = compileRegex "\\S"
regex_'3c'2f'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "</(?![0-9])[\\w_:][\\w.:_-]*"
defaultAttributes = [("Start","Normal Text"),("FindXML","Normal Text"),("FindEntityRefs","Other Text"),("FindPEntityRefs","Other Text"),("Comment","Comment"),("CDATA","Other Text"),("PI","Other Text"),("Doctype","Other Text"),("Doctype Internal Subset","Other Text"),("Doctype Markupdecl","Other Text"),("Doctype Markupdecl DQ","Value"),("Doctype Markupdecl SQ","Value"),("Element","Other Text"),("El Content","Other Text"),("El End","Other Text"),("Attribute","Other Text"),("Value","Other Text"),("Value DQ","Value"),("Value SQ","Value")]
parseRules "Start" =
do (attr, result) <- ((parseRules "FindXML"))
return (attr, result)
parseRules "FindXML" =
do (attr, result) <- (((pDetectSpaces >>= withAttribute "Normal Text"))
<|>
((pString False "<!--" >>= withAttribute "Comment") >>~ pushContext "Comment")
<|>
((pString False "<![CDATA[" >>= withAttribute "CDATA") >>~ pushContext "CDATA")
<|>
((pRegExpr regex_'3c'21DOCTYPE'5cs'2b >>= withAttribute "Doctype") >>~ pushContext "Doctype")
<|>
((pRegExpr regex_'3c'5c'3f'5b'5cw'3a'5f'2d'5d'2a >>= withAttribute "Processing Instruction") >>~ pushContext "PI")
<|>
((pRegExpr regex_'3c'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Element") >>~ pushContext "Element")
<|>
((parseRules "FindEntityRefs"))
<|>
((pDetectIdentifier >>= withAttribute "Normal Text")))
return (attr, result)
parseRules "FindEntityRefs" =
do (attr, result) <- (((pRegExpr regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b >>= withAttribute "EntityRef"))
<|>
((pAnyChar "&<" >>= withAttribute "Error")))
return (attr, result)
parseRules "FindPEntityRefs" =
do (attr, result) <- (((pRegExpr regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b >>= withAttribute "EntityRef"))
<|>
((pRegExpr regex_'25'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'3b >>= withAttribute "PEntityRef"))
<|>
((pAnyChar "&%" >>= withAttribute "Error")))
return (attr, result)
parseRules "Comment" =
do (attr, result) <- (((pDetectSpaces >>= withAttribute "Comment"))
<|>
((pString False "-->" >>= withAttribute "Comment") >>~ (popContext))
<|>
((pRegExpr regex_'2d'28'2d'28'3f'21'2d'3e'29'29'2b >>= withAttribute "Error"))
<|>
((Text.Highlighting.Kate.Syntax.Alert.parseExpression))
<|>
((pDetectIdentifier >>= withAttribute "Comment")))
return (attr, result)
parseRules "CDATA" =
do (attr, result) <- (((pDetectSpaces >>= withAttribute "Other Text"))
<|>
((pDetectIdentifier >>= withAttribute "Other Text"))
<|>
((pString False "]]>" >>= withAttribute "CDATA") >>~ (popContext))
<|>
((pString False "]]>" >>= withAttribute "EntityRef")))
return (attr, result)
parseRules "PI" =
do (attr, result) <- ((pDetect2Chars False '?' '>' >>= withAttribute "Processing Instruction") >>~ (popContext))
return (attr, result)
parseRules "Doctype" =
do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Doctype") >>~ (popContext))
<|>
((pDetectChar False '[' >>= withAttribute "Doctype") >>~ pushContext "Doctype Internal Subset"))
return (attr, result)
parseRules "Doctype Internal Subset" =
do (attr, result) <- (((pDetectChar False ']' >>= withAttribute "Doctype") >>~ (popContext))
<|>
((pRegExpr regex_'3c'21'28ELEMENT'7cENTITY'7cATTLIST'7cNOTATION'29'5cb >>= withAttribute "Doctype") >>~ pushContext "Doctype Markupdecl")
<|>
((pString False "<!--" >>= withAttribute "Comment") >>~ pushContext "Comment")
<|>
((pRegExpr regex_'3c'5c'3f'5b'5cw'3a'5f'2d'5d'2a >>= withAttribute "Processing Instruction") >>~ pushContext "PI")
<|>
((parseRules "FindPEntityRefs")))
return (attr, result)
parseRules "Doctype Markupdecl" =
do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Doctype") >>~ (popContext))
<|>
((pDetectChar False '"' >>= withAttribute "Value") >>~ pushContext "Doctype Markupdecl DQ")
<|>
((pDetectChar False '\'' >>= withAttribute "Value") >>~ pushContext "Doctype Markupdecl SQ"))
return (attr, result)
parseRules "Doctype Markupdecl DQ" =
do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ (popContext))
<|>
((parseRules "FindPEntityRefs")))
return (attr, result)
parseRules "Doctype Markupdecl SQ" =
do (attr, result) <- (((pDetectChar False '\'' >>= withAttribute "Value") >>~ (popContext))
<|>
((parseRules "FindPEntityRefs")))
return (attr, result)
parseRules "Element" =
do (attr, result) <- (((pDetect2Chars False '/' '>' >>= withAttribute "Element") >>~ (popContext))
<|>
((pDetectChar False '>' >>= withAttribute "Element") >>~ pushContext "El Content")
<|>
((pColumn 0 >> pRegExpr regex_'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Attribute") >>~ pushContext "Attribute")
<|>
((pRegExpr regex_'5cs'2b'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Attribute") >>~ pushContext "Attribute")
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "El Content" =
do (attr, result) <- (((pRegExpr regex_'3c'2f'28'3f'21'5b0'2d9'5d'29'5b'5cw'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Element") >>~ pushContext "El End")
<|>
((parseRules "FindXML")))
return (attr, result)
parseRules "El End" =
do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext >> popContext >> popContext))
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "Attribute" =
do (attr, result) <- (((pDetectChar False '=' >>= withAttribute "Attribute") >>~ pushContext "Value")
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "Value" =
do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ pushContext "Value DQ")
<|>
((pDetectChar False '\'' >>= withAttribute "Value") >>~ pushContext "Value SQ")
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "Value DQ" =
do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ (popContext >> popContext >> popContext))
<|>
((parseRules "FindEntityRefs")))
return (attr, result)
parseRules "Value SQ" =
do (attr, result) <- (((pDetectChar False '\'' >>= withAttribute "Value") >>~ (popContext >> popContext >> popContext))
<|>
((parseRules "FindEntityRefs")))
return (attr, result)
parseRules "" = parseRules "Start"
parseRules x = fail $ "Unknown context" ++ x