module DDC.Core.Parser.Lexer
(
isConName, isConStart, isConBody
, readTwConBuiltin
, readTcConBuiltin
, readWbConBuiltin
, readCon
, isVarName, isVarStart, isVarBody
, readVar
, lexExp)
where
import DDC.Base.Lexer
import DDC.Core.Exp
import DDC.Core.Parser.Tokens
import Data.Char
readWbConBuiltin :: String -> Maybe WbCon
readWbConBuiltin ss
= case ss of
"pure" -> Just WbConPure
"empty" -> Just WbConEmpty
"use" -> Just WbConUse
"read" -> Just WbConRead
"alloc" -> Just WbConAlloc
_ -> Nothing
keywords :: [(String, Tok n)]
keywords
= [ ("in", KA KIn)
, ("of", KA KOf)
, ("letrec", KA KLetRec)
, ("letregion", KA KLetRegion)
, ("withregion", KA KWithRegion)
, ("let", KA KLet)
, ("lazy", KA KLazy)
, ("case", KA KCase)
, ("purify", KA KPurify)
, ("forget", KA KForget)
, ("weakeff", KA KWeakEff)
, ("weakclo", KA KWeakClo)
, ("with", KA KWith)
, ("where", KA KWhere) ]
lexExp :: Int -> String -> [Token (Tok String)]
lexExp lineStart str
= lexWord lineStart 1 str
where
lexWord :: Int -> Int -> String -> [Token (Tok String)]
lexWord line column w
= let tok t = Token t (SourcePos Nothing line column)
tokA = tok . KA
tokN = tok . KN
lexMore n rest
= lexWord line (column + n) rest
in case w of
[] -> []
' ' : w' -> lexMore 1 w'
'\t' : w' -> lexMore 8 w'
'\n' : w' -> lexWord (line + 1) 1 w'
'(' : ')' : w' -> tokN (KCon "()") : lexMore 2 w'
'[' : ':' : w' -> tokA KSquareColonBra : lexMore 2 w'
':' : ']' : w' -> tokA KSquareColonKet : lexMore 2 w'
'<' : ':' : w' -> tokA KAngleColonBra : lexMore 2 w'
':' : '>' : w' -> tokA KAngleColonKet : lexMore 2 w'
'~' : '>' : w' -> tokA KArrowTilde : lexMore 2 w'
'-' : '>' : w' -> tokA KArrowDash : lexMore 2 w'
'=' : '>' : w' -> tokA KArrowEquals : lexMore 2 w'
':' : ':' : w' -> tokA KColonColon : lexMore 2 w'
'/' : '\\' : w' -> tokA KBigLambda : lexMore 2 w'
'^' : cs
| (ds, rest) <- span isDigit cs
, length ds >= 1
-> tokA (KIndex (read ds)) : lexMore (1 + length ds) rest
'(' : w' -> tokA KRoundBra : lexMore 1 w'
')' : w' -> tokA KRoundKet : lexMore 1 w'
'[' : w' -> tokA KSquareBra : lexMore 1 w'
']' : w' -> tokA KSquareKet : lexMore 1 w'
'{' : w' -> tokA KBraceBra : lexMore 1 w'
'}' : w' -> tokA KBraceKet : lexMore 1 w'
'<' : w' -> tokA KAngleBra : lexMore 1 w'
'>' : w' -> tokA KAngleKet : lexMore 1 w'
'.' : w' -> tokA KDot : lexMore 1 w'
'|' : w' -> tokA KBar : lexMore 1 w'
'^' : w' -> tokA KHat : lexMore 1 w'
'+' : w' -> tokA KPlus : lexMore 1 w'
':' : w' -> tokA KColon : lexMore 1 w'
',' : w' -> tokA KComma : lexMore 1 w'
'\\' : w' -> tokA KBackSlash : lexMore 1 w'
';' : w' -> tokA KSemiColon : lexMore 1 w'
'_' : w' -> tokA KUnderscore : lexMore 1 w'
'=' : w' -> tokA KEquals : lexMore 1 w'
'&' : w' -> tokA KAmpersand : lexMore 1 w'
'-' : w' -> tokA KDash : lexMore 1 w'
'!' : '0' : w' -> tokA KBotEffect : lexMore 2 w'
'$' : '0' : w' -> tokA KBotClosure : lexMore 2 w'
'*' : '*' : w' -> tokA KSortComp : lexMore 2 w'
'@' : '@' : w' -> tokA KSortProp : lexMore 2 w'
'*' : w' -> tokA KKindValue : lexMore 1 w'
'%' : w' -> tokA KKindRegion : lexMore 1 w'
'!' : w' -> tokA KKindEffect : lexMore 1 w'
'$' : w' -> tokA KKindClosure : lexMore 1 w'
'@' : w' -> tokA KKindWitness : lexMore 1 w'
c : cs
| isDigit c
, (body, rest) <- span isDigit cs
-> tokN (KLit (c:body)) : lexMore (length (c:body)) rest
c : cs
| isConStart c
, (body, rest) <- span isConBody cs
, (body', rest') <- case rest of
'#' : rest' -> (body ++ "#", rest')
_ -> (body, rest)
-> let readNamedCon s
| Just twcon <- readTwConBuiltin s
= tokA (KTwConBuiltin twcon) : lexMore (length s) rest'
| Just tccon <- readTcConBuiltin s
= tokA (KTcConBuiltin tccon) : lexMore (length s) rest'
| Just con <- readCon s
= tokN (KCon con) : lexMore (length s) rest'
| otherwise
= [tok (KJunk c)]
in readNamedCon (c : body')
c : cs
| isVarStart c
, (body, rest) <- span isVarBody cs
-> let readNamedVar s
| Just t <- lookup s keywords
= tok t : lexMore (length s) rest
| Just wc <- readWbConBuiltin s
= tokA (KWbConBuiltin wc) : lexMore (length s) rest
| Just v <- readVar s
= tokN (KVar v) : lexMore (length s) rest
| otherwise
= [tok (KJunk c)]
in readNamedVar (c : body)
c : _ -> [tok $ KJunk c]
isConName :: String -> Bool
isConName str
= case str of
[] -> False
(c:cs)
| isConStart c
, and (map isConBody cs)
-> True
| _ : _ <- cs
, isConStart c
, and (map isConBody (init cs))
, last cs == '#'
-> True
| otherwise
-> False
isConStart :: Char -> Bool
isConStart = isUpper
isConBody :: Char -> Bool
isConBody c = isUpper c || isLower c || isDigit c || c == '_'
readTwConBuiltin :: String -> Maybe TwCon
readTwConBuiltin ss
= case ss of
"Global" -> Just TwConGlobal
"DeepGlobal" -> Just TwConDeepGlobal
"Const" -> Just TwConConst
"DeepConst" -> Just TwConDeepConst
"Mutable" -> Just TwConMutable
"DeepMutable" -> Just TwConDeepMutable
"Lazy" -> Just TwConLazy
"HeadLazy" -> Just TwConHeadLazy
"Manifest" -> Just TwConManifest
"Pure" -> Just TwConPure
"Empty" -> Just TwConEmpty
_ -> Nothing
readTcConBuiltin :: String -> Maybe TcCon
readTcConBuiltin ss
= case ss of
"Read" -> Just TcConRead
"HeadRead" -> Just TcConHeadRead
"DeepRead" -> Just TcConDeepRead
"Write" -> Just TcConWrite
"DeepWrite" -> Just TcConDeepWrite
"Alloc" -> Just TcConAlloc
"DeepAlloc" -> Just TcConDeepAlloc
"Use" -> Just TcConUse
"DeepUse" -> Just TcConDeepUse
_ -> Nothing
readCon :: String -> Maybe String
readCon ss
| isConName ss = Just ss
| otherwise = Nothing
isVarName :: String -> Bool
isVarName [] = False
isVarName (c:cs) = isVarStart c && (and $ map isVarBody cs)
isVarStart :: Char -> Bool
isVarStart = isLower
isVarBody :: Char -> Bool
isVarBody c
= isUpper c || isLower c || isDigit c || c == '_' || c == '\''
readVar :: String -> Maybe String
readVar ss
| isVarName ss = Just ss
| otherwise = Nothing