module Text.Highlighter.Lexers.Atomo (lexer) where

import Data.List (intercalate)
import Text.Printf

import Text.Highlighter.Types


lexer :: Lexer
lexer = Lexer
    { lName = "Atomo"
    , lAliases = ["atomo"]
    , lExtensions = [".atomo"]
    , lMimetypes = ["text/x-atomo"]
    , lStart = root
    , lFlags = []
    }

ascii :: [String]
ascii =
    ["NUL","SOH","[SE]TX","EOT","ENQ","ACK",
     "BEL","BS","HT","LF","VT","FF","CR","S[OI]","DLE",
     "DC[1-4]","NAK","SYN","ETB","CAN",
     "EM","SUB","ESC","[FGRU]S","SP","DEL"]

reserved :: [String]
reserved = ["operator", "macro", "for-macro", "this"]

identifier :: String
identifier = "[a-zA-Z0-9_:!#%&\\*\\+\\.\\\\/<=>\\?@^\\|~\\-]"

operator :: String
operator = "[:!#%&\\*\\+\\.\\\\/<=>\\?@^\\|~\\-]"

root :: TokenMatcher
root =
    -- Whitespace:
    [ tok "\\s+" Text
    , tok "--.*?$" (Comment :. Single)
    , tokNext "{-" (Comment :. Multiline) (GoTo comment)

    -- Identifiers
    , tok (printf "\\b(%s)\\b(?!%s)" (intercalate "|" reserved) operator)
        (Keyword :. Reserved)
    , tok ("[_a-z]" ++ identifier ++ "*:") (Name :. Function)
    , tok ("[_a-z]" ++ identifier ++ "*") Name
    , tok ("[A-Z]" ++ identifier ++ "*") (Keyword :. Type)

    -- Operators
    , tok ("(?![@$~])" ++ operator ++ "+") Operator

    -- Numbers
    , tok "\\d+[eE][\\+\\-]?\\d+" (Number :. Float)
    , tok "\\d+\\.\\d+([eE][\\+\\-]?\\d+)?" (Number :. Float)
    , tok "0[oO][0-7]+" (Number :. Oct)
    , tok "0[xX][\\da-fA-F]+" (Number :. Hex)
    , tok "\\d+" (Number :. Integer)

    -- Characters & Strings
    , tokNext "\\$" (String :. Char) (GoTo character)
    , tokNext "\"" String (GoTo string)

    -- Boolean
    , tok "True|False" (Keyword :. Constant)

    -- Quoting
    , tok ("'" ++ identifier ++ "+") (String :. Symbol)
    , tok "'" (String :. Symbol)
    , tok ("`" ++ identifier ++ "+") (String :. Symbol)
    , tok "`" (String :. Symbol)
    , tok ("~" ++ identifier ++ "+") (String :. Interpol)
    , tok "~" (String :. Interpol)

    -- Particles
    , tok ("@" ++ identifier ++ "+") (Name :. Decorator)
    , tok "@" (Name :. Decorator)

    -- Punctuation
    , tok "[][(),;{}|]" Punctuation
    ]

comment :: TokenMatcher
comment =
    [ tok "[^\\-\\{\\}]+" (Comment :. Multiline)
    , tokNext "{-" (Comment :. Multiline) Push
    , tokNext "-}" (Comment :. Multiline) Pop
    , tok "[-{}]" (Comment :. Multiline)
    ]

character :: TokenMatcher
character =
    [ tokNext "[^\\\\]" (String :. Char) Pop
    , tokNext "\\\\[^\\s]+" (String :. Escape) Pop
    ]

string :: TokenMatcher
string =
    [ tok "[^\\\\\"]+" String
    , tokNext "\\\\" (String :. Escape) (GoTo escape)
    , tokNext "\"" String Pop
    ]

escape :: TokenMatcher
escape =
    [ tokNext "[abfnrtv\"&\\\\]" (String :. Escape) Pop
    , tokNext "\\^[\\]\\[A-Z@\\^_]" (String :. Escape) Pop
    , tokNext (intercalate "|" ascii) (String :. Escape) Pop
    , tokNext "o[0-7]+" (String :. Escape) Pop
    , tokNext "x[\\da-fA-F]+" (String :. Escape) Pop
    , tokNext "\\d+" (String :. Escape) Pop
    , tokNext "\\s+\\\\" (String :. Escape) Pop
    ]