module Text.Highlighting.Kate.Syntax.Html ( highlight, parseExpression, syntaxName, syntaxExtensions ) where
import Text.Highlighting.Kate.Definitions
import Text.Highlighting.Kate.Common
import qualified Text.Highlighting.Kate.Syntax.Alert
import qualified Text.Highlighting.Kate.Syntax.Css
import qualified Text.Highlighting.Kate.Syntax.Javascript
import Text.ParserCombinators.Parsec
import Control.Monad (when)
import Data.Map (fromList)
import Data.Maybe (fromMaybe, maybeToList)
syntaxName :: String
syntaxName = "HTML"
syntaxExtensions :: String
syntaxExtensions = "*.htm;*.html;*.shtml;*.shtm"
highlight :: String -> Either String [SourceLine]
highlight input =
case runParser parseSource startingState "source" input of
Left err -> Left $ show err
Right result -> Right result
parseExpression :: GenParser Char SyntaxState LabeledSource
parseExpression = do
st <- getState
let oldLang = synStLanguage st
setState $ st { synStLanguage = "HTML" }
context <- currentContext <|> (pushContext "Start" >> currentContext)
result <- parseRules context
updateState $ \st -> st { synStLanguage = oldLang }
return result
parseSource = do
lineContents <- lookAhead wholeLine
updateState $ \st -> st { synStCurrentLine = lineContents }
result <- manyTill parseSourceLine eof
return $ map normalizeHighlighting result
startingState = SyntaxState {synStContexts = fromList [("HTML",["Start"])], synStLanguage = "HTML", synStCurrentLine = "", synStCharsParsedInLine = 0, synStPrevChar = '\n', synStCaseSensitive = True, synStKeywordCaseSensitive = True, synStCaptures = []}
parseSourceLine = manyTill parseExpressionInternal pEndLine
pEndLine = do
lookAhead $ newline <|> (eof >> return '\n')
context <- currentContext
case context of
"Start" -> return () >> pHandleEndLine
"FindHTML" -> return () >> pHandleEndLine
"FindEntityRefs" -> return () >> pHandleEndLine
"FindPEntityRefs" -> return () >> pHandleEndLine
"FindAttributes" -> return () >> pHandleEndLine
"FindDTDRules" -> return () >> pHandleEndLine
"Comment" -> return () >> pHandleEndLine
"CDATA" -> return () >> pHandleEndLine
"PI" -> return () >> pHandleEndLine
"Doctype" -> return () >> pHandleEndLine
"Doctype Internal Subset" -> return () >> pHandleEndLine
"Doctype Markupdecl" -> return () >> pHandleEndLine
"Doctype Markupdecl DQ" -> return () >> pHandleEndLine
"Doctype Markupdecl SQ" -> return () >> pHandleEndLine
"El Open" -> return () >> pHandleEndLine
"El Close" -> return () >> pHandleEndLine
"El Close 2" -> return () >> pHandleEndLine
"El Close 3" -> return () >> pHandleEndLine
"CSS" -> return () >> pHandleEndLine
"CSS content" -> return () >> pHandleEndLine
"JS" -> return () >> pHandleEndLine
"JS content" -> return () >> pHandleEndLine
"JS comment close" -> (popContext) >> pEndLine
"Value" -> return () >> pHandleEndLine
"Value NQ" -> (popContext >> popContext) >> pEndLine
"Value DQ" -> return () >> pHandleEndLine
"Value SQ" -> return () >> pHandleEndLine
_ -> pHandleEndLine
withAttribute attr txt = do
when (null txt) $ fail "Parser matched no text"
let labs = attr : maybeToList (lookup attr styles)
st <- getState
let oldCharsParsed = synStCharsParsedInLine st
let prevchar = if null txt then '\n' else last txt
updateState $ \st -> st { synStCharsParsedInLine = oldCharsParsed + length txt, synStPrevChar = prevchar }
return (labs, txt)
styles = [("Comment","co"),("CDATA","bn"),("Processing Instruction","kw"),("Doctype","dt"),("Element","kw"),("Attribute","ot"),("Value","st"),("EntityRef","dv"),("PEntityRef","dv"),("Error","er")]
parseExpressionInternal = do
context <- currentContext
parseRules context <|> (pDefault >>= withAttribute (fromMaybe "" $ lookup context defaultAttributes))
regex_'3c'21DOCTYPE'5cs'2b = compileRegex "<!DOCTYPE\\s+"
regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a = compileRegex "<\\?[\\w:-]*"
regex_'3cstyle'5cb = compileRegex "<style\\b"
regex_'3cscript'5cb = compileRegex "<script\\b"
regex_'3cpre'5cb = compileRegex "<pre\\b"
regex_'3cdiv'5cb = compileRegex "<div\\b"
regex_'3ctable'5cb = compileRegex "<table\\b"
regex_'3cul'5cb = compileRegex "<ul\\b"
regex_'3col'5cb = compileRegex "<ol\\b"
regex_'3cdl'5cb = compileRegex "<dl\\b"
regex_'3c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "<[A-Za-z_:][\\w.:_-]*"
regex_'3c'2fpre'5cb = compileRegex "</pre\\b"
regex_'3c'2fdiv'5cb = compileRegex "</div\\b"
regex_'3c'2ftable'5cb = compileRegex "</table\\b"
regex_'3c'2ful'5cb = compileRegex "</ul\\b"
regex_'3c'2fol'5cb = compileRegex "</ol\\b"
regex_'3c'2fdl'5cb = compileRegex "</dl\\b"
regex_'3c'2f'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "</[A-Za-z_:][\\w.:_-]*"
regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b = compileRegex "&(#[0-9]+|#[xX][0-9A-Fa-f]+|[A-Za-z_:][\\w.:_-]*);"
regex_'25'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'3b = compileRegex "%[A-Za-z_:][\\w.:_-]*;"
regex_'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "[A-Za-z_:][\\w.:_-]*"
regex_'5cs'2b'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a = compileRegex "\\s+[A-Za-z_:][\\w.:_-]*"
regex_'3c'21'28ELEMENT'7cENTITY'7cATTLIST'7cNOTATION'29'5cb = compileRegex "<!(ELEMENT|ENTITY|ATTLIST|NOTATION)\\b"
regex_'2d'28'2d'28'3f'21'2d'3e'29'29'2b = compileRegex "-(-(?!->))+"
regex_'5cS = compileRegex "\\S"
regex_'3c'2fstyle'5cb = compileRegex "</style\\b"
regex_'3c'2fscript'5cb = compileRegex "</script\\b"
regex_'2f'2f'28'3f'3d'2e'2a'3c'2fscript'5cb'29 = compileRegex "//(?=.*</script\\b)"
regex_'2f'28'3f'21'3e'29 = compileRegex "/(?!>)"
regex_'5b'5e'2f'3e'3c'22'27'5cs'5d = compileRegex "[^/><\"'\\s]"
defaultAttributes = [("Start","Normal Text"),("FindHTML","Normal Text"),("FindEntityRefs","Other Text"),("FindPEntityRefs","Other Text"),("FindAttributes","Other Text"),("FindDTDRules","Other Text"),("Comment","Comment"),("CDATA","Other Text"),("PI","Other Text"),("Doctype","Other Text"),("Doctype Internal Subset","Other Text"),("Doctype Markupdecl","Other Text"),("Doctype Markupdecl DQ","Value"),("Doctype Markupdecl SQ","Value"),("El Open","Other Text"),("El Close","Other Text"),("El Close 2","Other Text"),("El Close 3","Other Text"),("CSS","Other Text"),("CSS content","Other Text"),("JS","Other Text"),("JS content","Other Text"),("JS comment close","Comment"),("Value","Other Text"),("Value NQ","Other Text"),("Value DQ","Value"),("Value SQ","Value")]
parseRules "Start" =
do (attr, result) <- ((parseRules "FindHTML"))
return (attr, result)
parseRules "FindHTML" =
do (attr, result) <- (((pDetectSpaces >>= withAttribute "Normal Text"))
<|>
((pDetectIdentifier >>= withAttribute "Normal Text"))
<|>
((pString False "<!--" >>= withAttribute "Comment") >>~ pushContext "Comment")
<|>
((pString False "<![CDATA[" >>= withAttribute "CDATA") >>~ pushContext "CDATA")
<|>
((pRegExpr regex_'3c'21DOCTYPE'5cs'2b >>= withAttribute "Doctype") >>~ pushContext "Doctype")
<|>
((pRegExpr regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a >>= withAttribute "Processing Instruction") >>~ pushContext "PI")
<|>
((pRegExpr regex_'3cstyle'5cb >>= withAttribute "Element") >>~ pushContext "CSS")
<|>
((pRegExpr regex_'3cscript'5cb >>= withAttribute "Element") >>~ pushContext "JS")
<|>
((pRegExpr regex_'3cpre'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
<|>
((pRegExpr regex_'3cdiv'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
<|>
((pRegExpr regex_'3ctable'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
<|>
((pRegExpr regex_'3cul'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
<|>
((pRegExpr regex_'3col'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
<|>
((pRegExpr regex_'3cdl'5cb >>= withAttribute "Element") >>~ pushContext "El Open")
<|>
((pRegExpr regex_'3c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Element") >>~ pushContext "El Open")
<|>
((pRegExpr regex_'3c'2fpre'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
<|>
((pRegExpr regex_'3c'2fdiv'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
<|>
((pRegExpr regex_'3c'2ftable'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
<|>
((pRegExpr regex_'3c'2ful'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
<|>
((pRegExpr regex_'3c'2fol'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
<|>
((pRegExpr regex_'3c'2fdl'5cb >>= withAttribute "Element") >>~ pushContext "El Close")
<|>
((pRegExpr regex_'3c'2f'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Element") >>~ pushContext "El Close")
<|>
((parseRules "FindDTDRules"))
<|>
((parseRules "FindEntityRefs")))
return (attr, result)
parseRules "FindEntityRefs" =
do (attr, result) <- (((pRegExpr regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b >>= withAttribute "EntityRef"))
<|>
((pAnyChar "&<" >>= withAttribute "Error")))
return (attr, result)
parseRules "FindPEntityRefs" =
do (attr, result) <- (((pRegExpr regex_'26'28'23'5b0'2d9'5d'2b'7c'23'5bxX'5d'5b0'2d9A'2dFa'2df'5d'2b'7c'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'29'3b >>= withAttribute "EntityRef"))
<|>
((pRegExpr regex_'25'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a'3b >>= withAttribute "PEntityRef"))
<|>
((pAnyChar "&%" >>= withAttribute "Error")))
return (attr, result)
parseRules "FindAttributes" =
do (attr, result) <- (((pColumn 0 >> pRegExpr regex_'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Attribute"))
<|>
((pRegExpr regex_'5cs'2b'5bA'2dZa'2dz'5f'3a'5d'5b'5cw'2e'3a'5f'2d'5d'2a >>= withAttribute "Attribute"))
<|>
((pDetectChar False '=' >>= withAttribute "Attribute") >>~ pushContext "Value"))
return (attr, result)
parseRules "FindDTDRules" =
do (attr, result) <- ((pRegExpr regex_'3c'21'28ELEMENT'7cENTITY'7cATTLIST'7cNOTATION'29'5cb >>= withAttribute "Doctype") >>~ pushContext "Doctype Markupdecl")
return (attr, result)
parseRules "Comment" =
do (attr, result) <- (((pDetectSpaces >>= withAttribute "Comment"))
<|>
((Text.Highlighting.Kate.Syntax.Alert.parseExpression))
<|>
((pDetectIdentifier >>= withAttribute "Comment"))
<|>
((pString False "-->" >>= withAttribute "Comment") >>~ (popContext))
<|>
((pRegExpr regex_'2d'28'2d'28'3f'21'2d'3e'29'29'2b >>= withAttribute "Error")))
return (attr, result)
parseRules "CDATA" =
do (attr, result) <- (((pDetectSpaces >>= withAttribute "Other Text"))
<|>
((pDetectIdentifier >>= withAttribute "Other Text"))
<|>
((pString False "]]>" >>= withAttribute "CDATA") >>~ (popContext))
<|>
((pString False "]]>" >>= withAttribute "EntityRef")))
return (attr, result)
parseRules "PI" =
do (attr, result) <- ((pDetect2Chars False '?' '>' >>= withAttribute "Processing Instruction") >>~ (popContext))
return (attr, result)
parseRules "Doctype" =
do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Doctype") >>~ (popContext))
<|>
((pDetectChar False '[' >>= withAttribute "Doctype") >>~ pushContext "Doctype Internal Subset"))
return (attr, result)
parseRules "Doctype Internal Subset" =
do (attr, result) <- (((pDetectChar False ']' >>= withAttribute "Doctype") >>~ (popContext))
<|>
((parseRules "FindDTDRules"))
<|>
((pString False "<!--" >>= withAttribute "Comment") >>~ pushContext "Comment")
<|>
((pRegExpr regex_'3c'5c'3f'5b'5cw'3a'2d'5d'2a >>= withAttribute "Processing Instruction") >>~ pushContext "PI")
<|>
((parseRules "FindPEntityRefs")))
return (attr, result)
parseRules "Doctype Markupdecl" =
do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Doctype") >>~ (popContext))
<|>
((pDetectChar False '"' >>= withAttribute "Value") >>~ pushContext "Doctype Markupdecl DQ")
<|>
((pDetectChar False '\'' >>= withAttribute "Value") >>~ pushContext "Doctype Markupdecl SQ"))
return (attr, result)
parseRules "Doctype Markupdecl DQ" =
do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ (popContext))
<|>
((parseRules "FindPEntityRefs")))
return (attr, result)
parseRules "Doctype Markupdecl SQ" =
do (attr, result) <- (((pDetectChar False '\'' >>= withAttribute "Value") >>~ (popContext))
<|>
((parseRules "FindPEntityRefs")))
return (attr, result)
parseRules "El Open" =
do (attr, result) <- (((pDetect2Chars False '/' '>' >>= withAttribute "Element") >>~ (popContext))
<|>
((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext))
<|>
((parseRules "FindAttributes"))
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "El Close" =
do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext))
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "El Close 2" =
do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext >> popContext >> popContext))
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "El Close 3" =
do (attr, result) <- (((pDetectChar False '>' >>= withAttribute "Element") >>~ (popContext >> popContext >> popContext >> popContext))
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "CSS" =
do (attr, result) <- (((pDetect2Chars False '/' '>' >>= withAttribute "Element") >>~ (popContext))
<|>
((pDetectChar False '>' >>= withAttribute "Element") >>~ pushContext "CSS content")
<|>
((parseRules "FindAttributes"))
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "CSS content" =
do (attr, result) <- (((pRegExpr regex_'3c'2fstyle'5cb >>= withAttribute "Element") >>~ pushContext "El Close 2")
<|>
((Text.Highlighting.Kate.Syntax.Css.parseExpression)))
return (attr, result)
parseRules "JS" =
do (attr, result) <- (((pDetect2Chars False '/' '>' >>= withAttribute "Element") >>~ (popContext))
<|>
((pDetectChar False '>' >>= withAttribute "Element") >>~ pushContext "JS content")
<|>
((parseRules "FindAttributes"))
<|>
((pRegExpr regex_'5cS >>= withAttribute "Error")))
return (attr, result)
parseRules "JS content" =
do (attr, result) <- (((pRegExpr regex_'3c'2fscript'5cb >>= withAttribute "Element") >>~ pushContext "El Close 2")
<|>
((pRegExpr regex_'2f'2f'28'3f'3d'2e'2a'3c'2fscript'5cb'29 >>= withAttribute "Comment") >>~ pushContext "JS comment close")
<|>
((Text.Highlighting.Kate.Syntax.Javascript.parseExpression)))
return (attr, result)
parseRules "JS comment close" =
do (attr, result) <- (((pRegExpr regex_'3c'2fscript'5cb >>= withAttribute "Element") >>~ pushContext "El Close 3")
<|>
((Text.Highlighting.Kate.Syntax.Alert.parseExpression)))
return (attr, result)
parseRules "Value" =
do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ pushContext "Value DQ")
<|>
((pDetectChar False '\'' >>= withAttribute "Value") >>~ pushContext "Value SQ")
<|>
((pDetectSpaces >>= withAttribute "Other Text"))
<|>
(pushContext "Value NQ" >> return ([], "")))
return (attr, result)
parseRules "Value NQ" =
do (attr, result) <- (((parseRules "FindEntityRefs"))
<|>
((pRegExpr regex_'2f'28'3f'21'3e'29 >>= withAttribute "Value"))
<|>
((pRegExpr regex_'5b'5e'2f'3e'3c'22'27'5cs'5d >>= withAttribute "Value"))
<|>
((popContext >> popContext) >> return ([], "")))
return (attr, result)
parseRules "Value DQ" =
do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "Value") >>~ (popContext >> popContext))
<|>
((parseRules "FindEntityRefs")))
return (attr, result)
parseRules "Value SQ" =
do (attr, result) <- (((pDetectChar False '\'' >>= withAttribute "Value") >>~ (popContext >> popContext))
<|>
((parseRules "FindEntityRefs")))
return (attr, result)
parseRules x = fail $ "Unknown context" ++ x