module Text.Highlighter.Lexers.Atomo (lexer) where import Data.List (intercalate) import Text.Printf import Text.Regex.PCRE.Light import Text.Highlighter.Types lexer :: Lexer lexer = Lexer { lName = "Atomo" , lAliases = ["atomo"] , lExtensions = [".atomo"] , lMimetypes = ["text/x-atomo"] , lStart = root , lFlags = [multiline] } ascii :: [String] ascii = ["NUL","SOH","[SE]TX","EOT","ENQ","ACK", "BEL","BS","HT","LF","VT","FF","CR","S[OI]","DLE", "DC[1-4]","NAK","SYN","ETB","CAN", "EM","SUB","ESC","[FGRU]S","SP","DEL"] reserved :: [String] reserved = ["operator", "macro", "for-macro", "this"] identifier :: String identifier = "[a-zA-Z0-9_!#%&\\*\\+\\.\\\\/<=>\\?@^\\|~\\-]" operator :: String operator = "[:!#%&\\*\\+\\.\\\\/<=>\\?@^\\|~\\-]" root :: TokenMatcher root = -- Comments [ tok "--.*?$" (Comment :. Single) , tokNext "{-" (Comment :. Multiline) (GoTo comment) -- Boolean , tok "True|False" (Keyword :. Constant) -- Numbers , tok "[\\+\\-]?\\d+[eE][\\+\\-]?\\d+" (Number :. Float) , tok "[\\+\\-]?\\d+\\.\\d+([eE][\\+\\-]?\\d+)?" (Number :. Float) , tok "[\\+\\-]?0[oO][0-7]+" (Number :. Oct) , tok "[\\+\\-]?0[xX][\\da-fA-F]+" (Number :. Hex) , tok "[\\+\\-]?\\d+/[\\+\\-]?\\d+" Number , tok "[\\+\\-]?\\d+" (Number :. Integer) -- Internal representations (TODO: these should get less ambiguous syntax.) , tokNext "<[a-z]" (Generic :. Output) (GoTo internal) -- Macro-Quote , tokNext ("(?![\"$|`;~@])(" ++ identifier ++ "+)([\"$|`'~@])") (String :. Other) (CapturesTo macroQuote) , tokNext ("(?![\"$|`;~@])(" ++ identifier ++ "+)\\(") (String :. Other) (GoTo (macroQuoteDelim "\\)")) , tokNext ("(?![\"$|`;~@])(" ++ identifier ++ "+)\\{") (String :. Other) (GoTo (macroQuoteDelim "\\}")) , tokNext ("(?![\"$|`;~@])(" ++ identifier ++ "+)\\[") (String :. Other) (GoTo (macroQuoteDelim "\\]")) -- Identifiers , tok (printf "\\b(%s)\\b(?!%s)" (intercalate "|" reserved) operator) (Keyword :. Reserved) , tok ("(?![@$~])(?!" ++ operator ++ "+(\\s|$))" ++ identifier ++ "+:") (Name :. Function) {-, tok ("[A-Z]" ++ identifier ++ "*") (Name :. Variable :. Global)-} , tok ("(?![@$~])(?!" ++ operator ++ "+(\\s|$))" ++ identifier ++ "+") Name -- Operators , tok ("(?![@$~])" ++ operator ++ "+") Operator -- Whitespace , tok "\\s+" Text -- Characters & Strings , tokNext "\\$" (String :. Char) (GoTo character) , tokNext "\"" String (GoTo string) -- Quoting , tok ("'" ++ identifier ++ "+") (String :. Symbol) , tok "'" (String :. Symbol) , tok ("`" ++ identifier ++ "+") (String :. Symbol) , tok "`" (String :. Symbol) , tok ("~" ++ identifier ++ "+") (String :. Interpol) , tok "~" (String :. Interpol) -- Particles , tok ("@(" ++ identifier ++ "+:)+") (Name :. Decorator) , tok ("@" ++ identifier ++ "+") (Name :. Decorator) , tok "@" (Name :. Decorator) -- Punctuation , tok "[][(),;{}|]" Punctuation ] internal :: TokenMatcher internal = [ tok "[^<>]+" (Generic :. Output) , tokNext "<" (Generic :. Output) Push , tokNext ">" (Generic :. Output) Pop ] comment :: TokenMatcher comment = [ tok "[^\\-\\{\\}]+" (Comment :. Multiline) , tokNext "{-" (Comment :. Multiline) Push , tokNext "-}" (Comment :. Multiline) Pop , tok "[-{}]" (Comment :. Multiline) ] character :: TokenMatcher character = [ tokNext "[^\\\\]" (String :. Char) Pop , tokNext "\\\\[^\\s]+" (String :. Escape) Pop ] string :: TokenMatcher string = [ tok "[^\\\\\"]+" String , tokNext "\\\\" (String :. Escape) (GoTo escape) , tokNext "\"" String Pop ] macroQuoteDelim :: String -> TokenMatcher macroQuoteDelim c = [ tok ("[^\\\\" ++ c ++ "]+") (String :. Other) , tokNext "\\\\." (String :. Other) Continue , tokNext (c ++ "([[:alpha:]]*)") (String :. Other) Pop ] macroQuote :: [String] -> TokenMatcher macroQuote cs = [ tok ("[^\\\\" ++ (cs !! 2) ++ "]+") (String :. Other) , tokNext "\\\\." (String :. Other) Continue , tokNext ((cs !! 2) ++ "([[:alpha:]]*)") (String :. Other) Pop ] escape :: TokenMatcher escape = [ tokNext "[abfnrtv\"&\\\\]" (String :. Escape) Pop , tokNext "\\^[\\]\\[A-Z@\\^_]" (String :. Escape) Pop , tokNext (intercalate "|" ascii) (String :. Escape) Pop , tokNext "o[0-7]+" (String :. Escape) Pop , tokNext "x[\\da-fA-F]+" (String :. Escape) Pop , tokNext "\\d+" (String :. Escape) Pop , tokNext "\\s+\\\\" (String :. Escape) Pop ]