{- This module was generated from data in the Kate syntax highlighting file ocaml.xml, version 1.04, by Glyn Webster (glyn@wave.co.nz) -} module Text.Highlighting.Kate.Syntax.Ocaml ( highlight, parseExpression, syntaxName, syntaxExtensions ) where import Text.Highlighting.Kate.Definitions import Text.Highlighting.Kate.Common import Text.ParserCombinators.Parsec import Control.Monad (when) import Data.Map (fromList) import Data.Maybe (fromMaybe, maybeToList) import qualified Data.Set as Set -- | Full name of language. syntaxName :: String syntaxName = "Objective Caml" -- | Filename extensions for this language. syntaxExtensions :: String syntaxExtensions = "*.ml;*.mli" -- | Highlight source code using this syntax definition. highlight :: String -> Either String [SourceLine] highlight input = case runParser parseSource startingState "source" input of Left err -> Left $ show err Right result -> Right result -- | Parse an expression using appropriate local context. parseExpression :: GenParser Char SyntaxState LabeledSource parseExpression = do st <- getState let oldLang = synStLanguage st setState $ st { synStLanguage = "Objective Caml" } context <- currentContext <|> (pushContext "Normal" >> currentContext) result <- parseRules context updateState $ \st -> st { synStLanguage = oldLang } return result parseSource = do lineContents <- lookAhead wholeLine updateState $ \st -> st { synStCurrentLine = lineContents } result <- manyTill parseSourceLine eof return $ map normalizeHighlighting result startingState = SyntaxState {synStContexts = fromList [("Objective Caml",["Normal"])], synStLanguage = "Objective Caml", synStCurrentLine = "", synStCharsParsedInLine = 0, synStPrevChar = '\n', synStCaseSensitive = True, synStKeywordCaseSensitive = True, synStCaptures = []} parseSourceLine = manyTill parseExpressionInternal pEndLine pEndLine = do newline <|> (eof >> return '\n') context <- currentContext case context of "Normal" -> return () "Multiline Comment" -> return () "String Constant" -> return () "Camlp4 Quotation Constant" -> return () _ -> return () lineContents <- lookAhead wholeLine updateState $ \st -> st { synStCurrentLine = lineContents, synStCharsParsedInLine = 0, synStPrevChar = '\n' } withAttribute attr txt = do when (null txt) $ fail "Parser matched no text" let labs = attr : maybeToList (lookup attr styles) st <- getState let oldCharsParsed = synStCharsParsedInLine st let prevchar = if null txt then '\n' else last txt updateState $ \st -> st { synStCharsParsedInLine = oldCharsParsed + length txt, synStPrevChar = prevchar } return (labs, txt) styles = [("Keyword","kw"),("Core Data Type","dt"),("Decimal","dv"),("Hexadecimal","bn"),("Octal","bn"),("Binary","bn"),("Float","fl"),("Character","ch"),("String","st"),("Escaped characters","ch"),("Comment","co"),("Camlp4 Quotation","st"),("Directive","ot")] parseExpressionInternal = do context <- currentContext parseRules context <|> (pDefault >>= withAttribute (fromMaybe "" $ lookup context defaultAttributes)) list_revised_syntax_keywords = Set.fromList $ words $ "declare value where" list_keywords = Set.fromList $ words $ "and as assert asr begin class closed constraint do done downto else end exception external false for fun function functor if in include inherit land lazy let lor lsl lsr lxor match method mod module mutable new of open or parser private rec sig struct then to true try type val virtual when while with" list_core_types = Set.fromList $ words $ "exn lazy_t format unit int real char string ref array bool list option" regex_'23'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'2e'2a'24 = compileRegex "#[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*.*$" regex_'27'28'28'5c'5c'5bntbr'27'22'5c'5c'5d'7c'5c'5c'5b0'2d9'5d'7b3'7d'7c'5c'5cx'5b0'2d9A'2dFa'2df'5d'7b2'7d'29'7c'5b'5e'27'5d'29'27 = compileRegex "'((\\\\[ntbr'\"\\\\]|\\\\[0-9]{3}|\\\\x[0-9A-Fa-f]{2})|[^'])'" regex_'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c = compileRegex "<:[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*<" regex_'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a = compileRegex "[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*" regex_'2d'3f0'5bxX'5d'5b0'2d9A'2dFa'2df'5f'5d'2b = compileRegex "-?0[xX][0-9A-Fa-f_]+" regex_'2d'3f0'5boO'5d'5b0'2d7'5f'5d'2b = compileRegex "-?0[oO][0-7_]+" regex_'2d'3f0'5bbB'5d'5b01'5f'5d'2b = compileRegex "-?0[bB][01_]+" regex_'2d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'28'5c'2e'5b0'2d9'5d'5b0'2d9'5f'5d'2a'28'5beE'5d'5b'2d'2b'5d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29'3f'7c'5beE'5d'5b'2d'2b'5d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29 = compileRegex "-?[0-9][0-9_]*(\\.[0-9][0-9_]*([eE][-+]?[0-9][0-9_]*)?|[eE][-+]?[0-9][0-9_]*)" regex_'2d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a = compileRegex "-?[0-9][0-9_]*" regex_'28'5c'5c'5bntbr'27'22'5c'5c'5d'7c'5c'5c'5b0'2d9'5d'7b3'7d'7c'5c'5cx'5b0'2d9A'2dFa'2df'5d'7b2'7d'29 = compileRegex "(\\\\[ntbr'\"\\\\]|\\\\[0-9]{3}|\\\\x[0-9A-Fa-f]{2})" regex_'5c'5c'24 = compileRegex "\\\\$" regex_'5c'5c'28'5c'5c'7c'3e'3e'7c'3c'3c'29 = compileRegex "\\\\(\\\\|>>|<<)" regex_'5c'5c'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c = compileRegex "\\\\<:[A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\0377_][A-Za-z\\0300-\\0326\\0330-\\0366\\0370-\\03770-9_']*<" defaultAttributes = [("Normal","Normal Text"),("Multiline Comment","Comment"),("String Constant","String"),("Camlp4 Quotation Constant","Camlp4 Quotation")] parseRules "Normal" = do (attr, result) <- (((pDetect2Chars False '(' '*' >>= withAttribute "Comment") >>~ pushContext "Multiline Comment") <|> ((pFirstNonSpace >> pRegExpr regex_'23'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'2e'2a'24 >>= withAttribute "Directive")) <|> ((pDetectChar False '"' >>= withAttribute "String") >>~ pushContext "String Constant") <|> ((pRegExpr regex_'27'28'28'5c'5c'5bntbr'27'22'5c'5c'5d'7c'5c'5c'5b0'2d9'5d'7b3'7d'7c'5c'5cx'5b0'2d9A'2dFa'2df'5d'7b2'7d'29'7c'5b'5e'27'5d'29'27 >>= withAttribute "Character")) <|> ((pDetect2Chars False '<' '<' >>= withAttribute "Camlp4 Quotation") >>~ pushContext "Camlp4 Quotation Constant") <|> ((pRegExpr regex_'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c >>= withAttribute "Camlp4 Quotation") >>~ pushContext "Camlp4 Quotation Constant") <|> ((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_keywords >>= withAttribute "Keyword")) <|> ((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_revised_syntax_keywords >>= withAttribute "Revised Syntax Keyword")) <|> ((pKeyword " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" list_core_types >>= withAttribute "Core Data Type")) <|> ((pRegExpr regex_'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a >>= withAttribute "Identifier")) <|> ((pRegExpr regex_'2d'3f0'5bxX'5d'5b0'2d9A'2dFa'2df'5f'5d'2b >>= withAttribute "Hexadecimal")) <|> ((pRegExpr regex_'2d'3f0'5boO'5d'5b0'2d7'5f'5d'2b >>= withAttribute "Octal")) <|> ((pRegExpr regex_'2d'3f0'5bbB'5d'5b01'5f'5d'2b >>= withAttribute "Binary")) <|> ((pRegExpr regex_'2d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'28'5c'2e'5b0'2d9'5d'5b0'2d9'5f'5d'2a'28'5beE'5d'5b'2d'2b'5d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29'3f'7c'5beE'5d'5b'2d'2b'5d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a'29 >>= withAttribute "Float")) <|> ((pRegExpr regex_'2d'3f'5b0'2d9'5d'5b0'2d9'5f'5d'2a >>= withAttribute "Decimal"))) return (attr, result) parseRules "Multiline Comment" = do (attr, result) <- (((pDetect2Chars False '*' ')' >>= withAttribute "Comment") >>~ (popContext >> return ())) <|> ((pDetect2Chars False '(' '*' >>= withAttribute "Comment") >>~ pushContext "Multiline Comment")) return (attr, result) parseRules "String Constant" = do (attr, result) <- (((pDetectChar False '"' >>= withAttribute "String") >>~ (popContext >> return ())) <|> ((pRegExpr regex_'28'5c'5c'5bntbr'27'22'5c'5c'5d'7c'5c'5c'5b0'2d9'5d'7b3'7d'7c'5c'5cx'5b0'2d9A'2dFa'2df'5d'7b2'7d'29 >>= withAttribute "Escaped characters")) <|> ((pRegExpr regex_'5c'5c'24 >>= withAttribute "Escaped characters"))) return (attr, result) parseRules "Camlp4 Quotation Constant" = do (attr, result) <- (((pDetect2Chars False '>' '>' >>= withAttribute "Camlp4 Quotation") >>~ (popContext >> return ())) <|> ((pDetect2Chars False '<' '<' >>= withAttribute "Camlp4 Quotation") >>~ pushContext "Camlp4 Quotation Constant") <|> ((pRegExpr regex_'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c >>= withAttribute "Camlp4 Quotation") >>~ pushContext "Camlp4 Quotation Constant") <|> ((pRegExpr regex_'5c'5c'28'5c'5c'7c'3e'3e'7c'3c'3c'29 >>= withAttribute "Escaped characters")) <|> ((pRegExpr regex_'5c'5c'3c'3a'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c0377'5f'5d'5bA'2dZa'2dz'5c0300'2d'5c0326'5c0330'2d'5c0366'5c0370'2d'5c03770'2d9'5f'27'5d'2a'3c >>= withAttribute "Escaped characters"))) return (attr, result) parseRules x = fail $ "Unknown context" ++ x