{-# LANGUAGE Arrows #-} module Skylighting.Parser ( parseSyntaxDefinition , addSyntaxDefinition , missingIncludes ) where import Data.ByteString.UTF8 (fromString) import Data.Char (isAlphaNum, toUpper) import Data.List (nub) import qualified Data.Map as Map import Data.Maybe (fromMaybe) import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as Text import Safe import Skylighting.Regex import Skylighting.Types import System.FilePath import Text.XML.HXT.Core -- | Adds a syntax definition to a syntax map, -- replacing any existing definition with the same name. addSyntaxDefinition :: Syntax -> SyntaxMap -> SyntaxMap addSyntaxDefinition s = Map.insert (sName s) s -- | Scan a list of 'Syntax's and make sure that -- `IncludeRules` never asks for a syntax not in this -- list. Produces a list of pairs where the first -- element is the including syntax name and the second -- element is the (missing) included syntax name. -- This is intended for sanity checks to avoid run-time -- errors. missingIncludes :: [Syntax] -> [(Text, Text)] missingIncludes syns = nub [(sName s, lang) | s <- syns , c <- Map.elems (sContexts s) , IncludeRules (lang, _) <- map rMatcher (cRules c) , lang `notElem` (map sName syns)] standardDelims :: Set.Set Char standardDelims = Set.fromList " \n\t.():!+,-<=>%&*/;?[]^{|}~\\" defaultKeywordAttr :: KeywordAttr defaultKeywordAttr = KeywordAttr { keywordCaseSensitive = True , keywordDelims = standardDelims } stripWhitespace :: String -> String stripWhitespace = reverse . stripWhitespaceLeft . reverse . stripWhitespaceLeft where stripWhitespaceLeft = dropWhile isWhitespace isWhitespace x = x `elem` [' ', '\t', '\n'] vBool :: Bool -> String -> Bool vBool defaultVal value = case value of z | z `elem` ["true","yes","1"] -> True z | z `elem` ["false","no","0"] -> False _ -> defaultVal -- | Parses a file containing a Kate XML syntax definition -- into a 'Syntax' description. parseSyntaxDefinition :: String -> IO (Either String Syntax) parseSyntaxDefinition xml = do res <- runX (application xml) case res of [s] -> return $ Right s _ -> return $ Left $ "Could not parse syntax definition " ++ xml application :: String -> IOSArrow b Syntax application fp = readDocument [withValidate no, withInputEncoding utf8] fp >>> multi (hasName "language") >>> extractSyntaxDefinition (takeFileName fp) extractSyntaxDefinition :: String -> IOSArrow XmlTree Syntax extractSyntaxDefinition filename = proc x -> do lang <- getAttrValue "name" -< x author <- getAttrValue "author" -< x version <- getAttrValue "version" -< x license <- getAttrValue "license" -< x extensions <- getAttrValue "extensions" -< x contexts <- getContexts $< (arr (vBool True) <<< getAttrValue "casesensitive") &&& (getAttrValue "name") &&& (arr toItemDataTable <<< getItemDatas) &&& getLists &&& (arr (headDef defaultKeywordAttr) <<< getKeywordAttrs) -< x startingContext <- case contexts of (c:_) -> returnA -< cName c [] -> issueErr "No contexts" >>> none -< () returnA -< Syntax{ sName = Text.pack lang , sFilename = filename , sShortname = Text.pack $ pathToLangName filename , sAuthor = Text.pack $ author , sVersion = Text.pack $ version , sLicense = Text.pack $ license , sExtensions = words $ map (\c -> if c == ';' then ' ' else c) extensions , sContexts = Map.fromList [(cName c, c) | c <- contexts] , sStartingContext = startingContext } toItemDataTable :: [(String,String)] -> Map.Map String TokenType toItemDataTable = Map.fromList . map (\(s,t) -> (s, toTokenType t)) getItemDatas :: IOSArrow XmlTree [(String,String)] getItemDatas = multi (hasName "itemDatas") >>> (listA $ getChildren >>> hasName "itemData" >>> getAttrValue "name" &&& getAttrValue "defStyleNum") toTokenType :: String -> TokenType toTokenType s = case s of "dsNormal" -> NormalTok "dsKeyword" -> KeywordTok "dsDataType" -> DataTypeTok "dsDecVal" -> DecValTok "dsBaseN" -> BaseNTok "dsFloat" -> FloatTok "dsConstant" -> ConstantTok "dsChar" -> CharTok "dsSpecialChar" -> SpecialCharTok "dsString" -> StringTok "dsVerbatimString" -> VerbatimStringTok "dsSpecialString" -> SpecialStringTok "dsImport" -> ImportTok "dsComment" -> CommentTok "dsDocumentation" -> DocumentationTok "dsAnnotation" -> AnnotationTok "dsCommentVar" -> CommentVarTok "dsOthers" -> OtherTok "dsFunction" -> FunctionTok "dsVariable" -> VariableTok "dsControlFlow" -> ControlFlowTok "dsOperator" -> OperatorTok "dsBuiltIn" -> BuiltInTok "dsExtension" -> ExtensionTok "dsPreprocessor" -> PreprocessorTok "dsAttribute" -> AttributeTok "dsRegionMarker" -> RegionMarkerTok "dsInformation" -> InformationTok "dsWarning" -> WarningTok "dsAlert" -> AlertTok "dsError" -> ErrorTok _ -> NormalTok getLists :: IOSArrow XmlTree [(String, [String])] getLists = listA $ multi (hasName "list") >>> getAttrValue "name" &&& getListContents getListContents :: IOSArrow XmlTree [String] getListContents = listA $ getChildren >>> hasName "item" >>> getChildren >>> getText >>> arr stripWhitespace getContexts :: (Bool, (String, (Map.Map String TokenType, ([(String, [String])], KeywordAttr)))) -> IOSArrow XmlTree [Context] getContexts (casesensitive, (syntaxname, (itemdatas, (lists, kwattr)))) = listA $ multi (hasName "context") >>> proc x -> do name <- getAttrValue "name" -< x attribute <- getAttrValue "attribute" -< x lineEmptyContext <- getAttrValue "lineEmptyContext" -< x lineEndContext <- getAttrValue "lineEndContext" -< x lineBeginContext <- getAttrValue "lineBeginContext" -< x fallthrough <- arr (vBool False) <<< getAttrValue "fallthrough" -< x fallthroughContext <- getAttrValue "fallthroughContext" -< x dynamic <- arr (vBool False) <<< getAttrValue "dynamic" -< x parsers <- getParsers (casesensitive, (syntaxname, (itemdatas, (lists, kwattr)))) $< getAttrValue "attribute" -< x returnA -< Context { cName = Text.pack name , cSyntax = Text.pack syntaxname , cRules = parsers , cAttribute = fromMaybe NormalTok $ Map.lookup attribute itemdatas , cLineEmptyContext = parseContextSwitch syntaxname lineEmptyContext , cLineEndContext = parseContextSwitch syntaxname lineEndContext , cLineBeginContext = parseContextSwitch syntaxname lineBeginContext , cFallthrough = fallthrough , cFallthroughContext = parseContextSwitch syntaxname fallthroughContext , cDynamic = dynamic } -- Note, some xml files have "\\" for a backslash, -- others have "\". Not sure what the rules are, but -- this covers both bases: readChar :: String -> Char readChar s = case s of [c] -> c _ -> readDef '\xffff' $ "'" ++ s ++ "'" getParsers :: (Bool, (String, (Map.Map String TokenType, ([(String, [String])], KeywordAttr)))) -> String -- context attribute -> IOSArrow XmlTree [Rule] getParsers (casesensitive, (syntaxname, (itemdatas, (lists, kwattr)))) cattr = listA $ getChildren >>> proc x -> do name <- getName -< x attribute <- getAttrValue "attribute" -< x context <- getAttrValue "context" -< x char0 <- arr readChar <<< getAttrValue "char" -< x char1 <- arr readChar <<< getAttrValue "char1" -< x str' <- getAttrValue "String" -< x insensitive <- arr (vBool (not casesensitive)) <<< getAttrValue "insensitive" -< x includeAttrib <- arr (vBool False) <<< getAttrValue "includeAttrib" -< x lookahead <- arr (vBool False) <<< getAttrValue "lookAhead" -< x firstNonSpace <- arr (vBool False) <<< getAttrValue "firstNonSpace" -< x column' <- getAttrValue "column" -< x dynamic <- arr (vBool False) <<< getAttrValue "dynamic" -< x children <- getParsers (casesensitive, (syntaxname, (itemdatas, (lists, kwattr)))) cattr -< x let tildeRegex = name == "RegExpr" && take 1 str' == "^" let str = if tildeRegex then drop 1 str' else str' let column = if tildeRegex then Just (0 :: Int) else readMay column' let compiledRe = if dynamic then Nothing else Just $ compileRegex True (fromString str) let re = RegExpr RE{ reString = fromString $ convertOctalEscapes str , reCompiled = compiledRe , reCaseSensitive = not insensitive } let (incsyntax, inccontext) = case break (=='#') context of (cont, '#':'#':lang) -> (lang, cont) _ -> (syntaxname, context) let mbmatcher = case name of "DetectChar" -> Just $ DetectChar char0 "Detect2Chars" -> Just $ Detect2Chars char0 char1 "AnyChar" -> Just $ AnyChar str "RangeDetect" -> Just $ RangeDetect char0 char1 "StringDetect" -> Just $ StringDetect $ Text.pack str "WordDetect" -> Just $ WordDetect $ Text.pack str "RegExpr" -> Just $ re "keyword" -> Just $ Keyword kwattr $ maybe (makeWordSet True []) (makeWordSet (keywordCaseSensitive kwattr) . map Text.pack) (lookup str lists) "Int" -> Just $ Int "Float" -> Just $ Float "HlCOct" -> Just $ HlCOct "HlCHex" -> Just $ HlCHex "HlCStringChar" -> Just $ HlCStringChar "HlCChar" -> Just $ HlCChar "LineContinue" -> Just $ LineContinue "IncludeRules" -> Just $ IncludeRules (Text.pack incsyntax, Text.pack inccontext) "DetectSpaces" -> Just $ DetectSpaces "DetectIdentifier" -> Just $ DetectIdentifier _ -> Nothing matcher <- case mbmatcher of Nothing -> none <<< applyA (arr issueWarn) <<< arr ("Unknown element " ++) <<< getName -< x Just m -> returnA -< m let contextSwitch = if name == "IncludeRules" then [] -- is this right? else parseContextSwitch syntaxname context returnA -< Rule{ rMatcher = matcher, rAttribute = fromMaybe NormalTok $ if null attribute then Map.lookup cattr itemdatas else Map.lookup attribute itemdatas, rIncludeAttribute = includeAttrib, rDynamic = dynamic, rCaseSensitive = not insensitive, rChildren = children, rContextSwitch = contextSwitch, rLookahead = lookahead, rFirstNonspace = firstNonSpace , rColumn = column } parseContextSwitch :: String -> String -> [ContextSwitch] parseContextSwitch _ [] = [] parseContextSwitch _ "#stay" = [] parseContextSwitch syntaxname ('#':'p':'o':'p':xs) = Pop : parseContextSwitch syntaxname xs parseContextSwitch syntaxname ('!':xs) = [Push (Text.pack syntaxname, Text.pack xs)] parseContextSwitch syntaxname xs = [Push (Text.pack syntaxname, Text.pack xs)] getKeywordAttrs :: IOSArrow XmlTree [KeywordAttr] getKeywordAttrs = listA $ multi $ hasName "keywords" >>> proc x -> do caseSensitive <- arr (vBool True) <<< getAttrValue "casesensitive" -< x weakDelim <- getAttrValue "weakDeliminator" -< x additionalDelim <- getAttrValue "additionalDeliminator" -< x returnA -< KeywordAttr { keywordCaseSensitive = caseSensitive , keywordDelims = (Set.union standardDelims (Set.fromList additionalDelim)) Set.\\ Set.fromList weakDelim } pathToLangName :: String -> String pathToLangName s = capitalize (camelize (takeBaseName s)) camelize :: String -> String camelize (d:c:cs) | not (isAlphaNum d) = toUpper c : camelize cs camelize (c:cs) = c : camelize cs camelize [] = [] capitalize :: String -> String capitalize (c:cs) = toUpper c : cs capitalize [] = []