module Language.Haskell.HsColour.Classify
( TokenType(..)
, tokenise
) where
import Char
import List
tokenise :: String -> [(TokenType,String)]
tokenise str =
let chunks = glue . chunk $ str
newline = True : (map ("\n" `isPrefixOf`) chunks)
in map (\(s,n)-> (classify s n,s)) (zip chunks newline)
chunk :: String -> [String]
chunk [] = []
chunk ('\r':s) = chunk s
chunk ('\n':s) = "\n": chunk s
chunk (c:s) | isLinearSpace c
= (c:ss): chunk rest where (ss,rest) = span isLinearSpace s
chunk ('{':'-':s) = let (com,s') = nestcomment 0 s
in ('{':'-':com) : chunk s'
chunk s = case Prelude.lex s of
[] -> [head s]: chunk (tail s)
((tok@('-':'-':_),rest):_)
| all (=='-') tok -> (tok++com): chunk s'
where (com,s') = eolcomment rest
((tok,rest):_) -> tok: chunk rest
isLinearSpace c = c `elem` " \t\f"
glue ("`":rest) =
case glue rest of
(qn:"`":rest) -> ("`"++qn++"`"): glue rest
_ -> "`": glue rest
glue (s:ss) | all (=='-') s && length s >=2
= (s++concat c): glue rest
where (c,rest) = break ('\n'`elem`) ss
glue ("(":ss) = case rest of
")":rest -> ("(" ++ concat tuple ++ ")") : glue rest
_ -> "(" : glue ss
where (tuple,rest) = span (==",") ss
glue ("[":"]":ss) = "[]" : glue ss
glue ("\n":"#":ss)= "\n" : ('#':concat line) : glue rest
where (line,rest) = break ('\n'`elem`) ss
glue (s:ss) = s: glue ss
glue [] = []
nestcomment :: Int -> String -> (String,String)
nestcomment n ('{':'-':ss) | n>=0 = (("{-"++cs),rm)
where (cs,rm) = nestcomment (n+1) ss
nestcomment n ('-':'}':ss) | n>0 = (("-}"++cs),rm)
where (cs,rm) = nestcomment (n1) ss
nestcomment n ('-':'}':ss) | n==0 = ("-}",ss)
nestcomment n (s:ss) | n>=0 = ((s:cs),rm)
where (cs,rm) = nestcomment n ss
nestcomment n [] = ([],[])
eolcomment :: String -> (String,String)
eolcomment s@('\n':_) = ([], s)
eolcomment ('\r':s) = eolcomment s
eolcomment (c:s) = (c:cs, s') where (cs,s') = eolcomment s
eolcomment [] = ([],[])
data TokenType =
Space | Keyword | Keyglyph | Layout | Comment | Conid | Varid |
Conop | Varop | String | Char | Number | Cpp | Error |
Definition
deriving (Eq,Show)
classify :: String -> Bool -> TokenType
classify s@(h:t) newline
| isSpace h = Space
| all (=='-') s = Comment
| "--" `isPrefixOf` s
&& any isSpace s = Comment
| "{-" `isPrefixOf` s = Comment
| s `elem` keywords = Keyword
| s `elem` keyglyphs = Keyglyph
| s `elem` layoutchars = Layout
| isUpper h = Conid
| s == "[]" = Conid
| h == '(' && isTupleTail t = Conid
| h == '#' = Cpp
| isLower h = if newline then Definition else Varid
| h `elem` symbols = Varop
| h==':' = Conop
| h=='`' = Varop
| h=='"' = String
| h=='\'' = Char
| isDigit h = Number
| otherwise = Error
classify _ _ = Space
isTupleTail [')'] = True
isTupleTail (',':xs) = isTupleTail xs
isTupleTail _ = False
keywords =
["case","class","data","default","deriving","do","else","forall"
,"if","import","in","infix","infixl","infixr","instance","let","module"
,"newtype","of","qualified","then","type","where","_"
,"foreign","ccall","as","safe","unsafe"]
keyglyphs =
["..","::","=","\\","|","<-","->","@","~","=>","[","]"]
layoutchars =
map (:[]) ";{}(),"
symbols =
"!#$%&*+./<=>?@\\^|-~"