> module Epic.Lexer where
> import Data.Char
> import Epic.Language
> type LineNumber = Int
> type P a = String -> String -> LineNumber -> Result a
> getLineNo :: P LineNumber
> getLineNo = \s fn l -> Success l
> getFileName :: P String
> getFileName = \s fn l -> Success fn
> getContent :: P String
> getContent = \s fn l -> Success s
> thenP :: P a -> (a -> P b) -> P b
> m `thenP` k = \s fn l ->
>    case m s fn l of
>        Success a -> k a s fn l
>        Failure e f ln -> Failure e f ln
> returnP :: a -> P a
> returnP a = \s fn l -> Success a
> 
> failP :: String -> P a
> failP err = \s fn l -> Failure err fn l
> catchP :: P a -> (String -> P a) -> P a
> catchP m k = \s fn l ->
>    case m s fn l of
>       Success a -> Success a
>       Failure e f ln -> k e s fn l
> happyError :: P a
> happyError = reportError "Parse error"
> reportError :: String -> P a
> reportError err = getFileName `thenP` \fn ->
>                   getLineNo `thenP` \line ->
>                   getContent `thenP` \content ->
>                       failP (fn ++ ":" ++ show line ++ ":" ++ err ++ " at " ++ take 40 content ++ " ...")
> data Token 
>       = TokenName Name
>       | TokenString String
>       | TokenInt Int
>       | TokenFloat Double
>       | TokenBigInt Integer
>       | TokenBigFloat Double
>       | TokenChar Char
>       | TokenBool Bool
>       | TokenIntType
>       | TokenBigIntType
>       | TokenCharType
>       | TokenBoolType
>       | TokenFloatType
>       | TokenBigFloatType
>       | TokenStringType
>       | TokenPtrType
>       | TokenUnitType
>       | TokenAnyType
>       | TokenDataType
>       | TokenTyCType
>       | TokenTyLinear
>       | TokenTyEval
>       | TokenFunType
>       | TokenForeign
>       | TokenCInclude
>       | TokenLink
>       | TokenInline
>       | TokenOB
>       | TokenCB
>       | TokenOCB
>       | TokenCCB
>       | TokenOSB
>       | TokenCSB
>       | TokenPlus
>       | TokenMinus
>       | TokenTimes
>       | TokenDivide
>       | TokenMod
>       | TokenFPlus
>       | TokenFMinus
>       | TokenFTimes
>       | TokenFDivide
>       | TokenEquals
>       | TokenEQ
>       | TokenGE
>       | TokenLE
>       | TokenGT
>       | TokenLT
>       | TokenFEQ
>       | TokenFGE
>       | TokenFLE
>       | TokenFGT
>       | TokenFLT
>       | TokenShL
>       | TokenShR
>       | TokenArrow
>       | TokenColon
>       | TokenUnit
>       | TokenCon
>       | TokenDefault
>       | TokenExport
>       | TokenCType
>       | TokenLet
>       | TokenCase
>       | TokenOf
>       | TokenIf
>       | TokenThen
>       | TokenElse
>       | TokenWhile
>       | TokenUnused
>       | TokenIn
>       | TokenLazy
>       | TokenPar
>       | TokenStrict
>       | TokenEffect
>       | TokenError
>       | TokenImpossible
>       | TokenProj
>       | TokenSemi
>       | TokenComma
>       | TokenBar
>       | TokenLam
>       | TokenDot
>       | TokenExtern
>       | TokenMemory
>       | TokenFixed
>       | TokenGrowable
>       | TokenInclude
>       | TokenEOF
>  deriving (Show, Eq)
> 
> 
> lexer :: (Token -> P a) -> P a
> lexer cont [] = cont TokenEOF []
> lexer cont ('\n':cs) = \fn line -> lexer cont cs fn (line+1)
> lexer cont (c:cs)
>       | isSpace c = \fn line -> lexer cont cs fn line
>       | isAlpha c = lexVar cont (c:cs)
>       | isDigit c = lexNum cont (c:cs)
>       | c == '_' = lexVar cont (c:cs)
> lexer cont ('"':cs) = lexString cont cs
> lexer cont ('\'':cs) = lexChar cont cs
> lexer cont ('{':'-':cs) = lexerEatComment 0 cont cs
> lexer cont ('-':'-':cs) = lexerEatToNewline cont cs
> lexer cont ('-':'>':cs) = cont TokenArrow cs
> lexer cont ('(':cs) = cont TokenOB cs
> lexer cont (')':cs) = cont TokenCB cs
> lexer cont ('{':cs) = cont TokenOCB cs
> lexer cont ('}':cs) = cont TokenCCB cs
> lexer cont ('[':cs) = cont TokenOSB cs
> lexer cont (']':cs) = cont TokenCSB cs
> lexer cont ('+':'.':cs) = cont TokenFPlus cs
> lexer cont ('-':'.':cs) = cont TokenFMinus cs
> lexer cont ('*':'.':cs) = cont TokenFTimes cs
> lexer cont ('/':'.':cs) = cont TokenFDivide cs
> lexer cont ('+':cs) = cont TokenPlus cs
> lexer cont ('-':cs) = cont TokenMinus cs
> lexer cont ('*':cs) = cont TokenTimes cs
> lexer cont ('/':cs) = cont TokenDivide cs
> lexer cont ('=':'=':'.':cs) = cont TokenFEQ cs
> lexer cont ('>':'=':'.':cs) = cont TokenFGE cs
> lexer cont ('<':'=':'.':cs) = cont TokenFLE cs
> lexer cont ('>':'.':cs) = cont TokenFGT cs
> lexer cont ('<':'.':cs) = cont TokenFLT cs
> lexer cont ('=':'=':cs) = cont TokenEQ cs
> lexer cont ('>':'=':cs) = cont TokenGE cs
> lexer cont ('<':'=':cs) = cont TokenLE cs
> lexer cont ('<':'<':cs) = cont TokenShL cs
> lexer cont ('>':'>':cs) = cont TokenShR cs
> lexer cont ('>':cs) = cont TokenGT cs
> lexer cont ('<':cs) = cont TokenLT cs
> lexer cont ('=':cs) = cont TokenEquals cs
> lexer cont (':':cs) = cont TokenColon cs
> lexer cont ('!':cs) = cont TokenProj cs
> lexer cont (';':cs) = cont TokenSemi cs
> lexer cont (',':cs) = cont TokenComma cs
> lexer cont ('|':cs) = cont TokenBar cs
> lexer cont ('.':cs) = cont TokenDot cs
> lexer cont ('\\':cs) = cont TokenLam cs
> lexer cont ('%':c:cs) | isAlpha c = lexSpecial cont (c:cs)
> lexer cont ('%':cs) = cont TokenMod cs
> lexer cont (c:cs) = lexError c cs
> lexError c s f l = failP (show l ++ ": Unrecognised token '" ++ [c] ++ "'\n") s f l
> lexerEatComment nls cont ('-':'}':cs)
>     = \fn line -> lexer cont cs fn (line+nls)
> lexerEatComment nls cont ('\n':cs) = lexerEatComment (nls+1) cont cs
> lexerEatComment nls cont (c:cs) = lexerEatComment nls cont cs
> 
> lexerEatToNewline cont ('\n':cs)
>    = \fn line -> lexer cont cs fn (line+1)
> lexerEatToNewline cont (c:cs) = lexerEatToNewline cont cs
> lexNum cont cs = case readNum cs of
>                     (num,'L':rest,isreal) ->
>                         cont (tok True num isreal) rest
>                     (num,rest,isreal) ->
>                         cont (tok False num isreal) rest
>   where tok False num isreal | isreal = TokenFloat (read num)
>                              | otherwise = TokenInt (read num)
>         tok True num isreal | isreal = TokenBigFloat (read num)
>                             | otherwise = TokenBigInt (read num)
> readNum :: String -> (String,String,Bool)
> readNum x = rn' False "" x
>   where rn' dot acc [] = (acc,[],dot)
>         rn' False acc ('.':xs) | head xs /= '.' = rn' True (acc++".") xs
>         rn' dot acc (x:xs) | isDigit x = rn' dot (acc++[x]) xs
>         rn' dot acc ('e':'+':xs) = rn' True (acc++"e+") xs
>         rn' dot acc ('e':'-':xs) = rn' True (acc++"e-") xs
>         rn' dot acc ('e':xs) = rn' True (acc++"e") xs
>         rn' dot acc xs = (acc,xs,dot)
> lexString cont cs =
>    \fn line ->
>    case getstr cs of
>       Just (str,rest,nls) -> cont (TokenString str) rest fn (nls+line)
>       Nothing -> failP (fn++":"++show line++":Unterminated string contant")
>                     cs fn line
> lexChar cont cs =
>    \fn line ->
>    case getchar cs of
>       Just (str,rest) -> cont (TokenChar str) rest fn line
>       Nothing -> 
>           failP (fn++":"++show line++":Unterminated character constant")
>                        cs fn line
> isAllowed c = isAlpha c || isDigit c || c `elem` "_\'?#"
> lexVar cont cs =
>    case span isAllowed cs of
> -- Keywords
>       ("Default",rest) -> cont TokenDefault rest
> -- Types
>       ("Int",rest) -> cont TokenIntType rest
>       ("Char",rest) -> cont TokenCharType rest
>       ("Bool",rest) -> cont TokenBoolType rest
>       ("Float",rest) -> cont TokenFloatType rest
>       ("BigInt",rest) -> cont TokenBigIntType rest
>       ("BigFloat",rest) -> cont TokenBigFloatType rest
>       ("String",rest) -> cont TokenStringType rest
>       ("Ptr",rest) -> cont TokenPtrType rest
>       ("Unit",rest) -> cont TokenUnitType rest
>       ("Data",rest) -> cont TokenDataType rest
>       ("CType",rest) -> cont TokenTyCType rest
>       ("Linear",rest) -> cont TokenTyLinear rest
>       ("Eval",rest) -> cont TokenTyEval rest
>       ("Fun",rest) -> cont TokenFunType rest
>       ("Any",rest) -> cont TokenAnyType rest
> -- values
>       ("unit",rest) -> cont TokenUnit rest
>       ("Con",rest) -> cont TokenCon rest
>       ("true",rest) -> cont (TokenBool True) rest
>       ("false",rest) -> cont (TokenBool False) rest
> -- expressions
>       ("let",rest) -> cont TokenLet rest
>       ("case",rest) -> cont TokenCase rest
>       ("of",rest) -> cont TokenOf rest
>       ("if",rest) -> cont TokenIf rest
>       ("then",rest) -> cont TokenThen rest
>       ("else",rest) -> cont TokenElse rest
>       ("in",rest) -> cont TokenIn rest
>       ("lazy",rest) -> cont TokenLazy rest
>       ("par",rest) -> cont TokenPar rest
>       ("error",rest) -> cont TokenError rest
>       ("impossible",rest) -> cont TokenImpossible rest
>       ("foreign",rest) -> cont TokenForeign rest
> -- declarations
>       ("export",rest) -> cont TokenExport rest
>       ("ctype",rest) -> cont TokenCType rest
>       ("extern",rest) -> cont TokenExtern rest
>       ("include",rest) -> cont TokenInclude rest
>       (var,rest)   -> cont (mkname var) rest
> lexSpecial cont cs =
>     case span isAllowed cs of
>       ("include",rest) -> cont TokenCInclude rest
>       ("link",rest) -> cont TokenLink rest
>       ("inline",rest) -> cont TokenInline rest
>       ("effect",rest) -> cont TokenEffect rest
>       ("strict",rest) -> cont TokenStrict rest
>       ("while",rest) -> cont TokenWhile rest
>       ("memory",rest) -> cont TokenMemory rest
>       ("fixed",rest) -> cont TokenFixed rest
>       ("growable",rest) -> cont TokenGrowable rest
>       ("unused", rest) -> cont TokenUnused rest
>       (thing,rest) -> cont TokenMod (thing++rest)
> mkname :: String -> Token
> mkname c = TokenName (UN c)
> getstr :: String -> Maybe (String,String,Int)
> getstr cs = case getstr' "" cs 0 of
>                Just (str,rest,nls) -> Just (reverse str,rest,nls)
>                _ -> Nothing
> getstr' acc ('\"':xs) = \nl -> Just (acc,xs,nl)
> getstr' acc ('\\':'n':xs) = getstr' ('\n':acc) xs -- Newline
> getstr' acc ('\\':'r':xs) = getstr' ('\r':acc) xs -- CR
> getstr' acc ('\\':'t':xs) = getstr' ('\t':acc) xs -- Tab
> getstr' acc ('\\':'b':xs) = getstr' ('\b':acc) xs -- Backspace
> getstr' acc ('\\':'a':xs) = getstr' ('\a':acc) xs -- Alert
> getstr' acc ('\\':'f':xs) = getstr' ('\f':acc) xs -- Formfeed
> getstr' acc ('\\':'0':xs) = getstr' ('\0':acc) xs -- null
> getstr' acc ('\\':x:xs) = getstr' (x:acc) xs -- Literal
> getstr' acc ('\n':xs) = \nl ->getstr' ('\n':acc) xs (nl+1) -- Count the newline
> getstr' acc (x:xs) = getstr' (x:acc) xs
> getstr' _ _ = \nl -> Nothing
> getchar :: String -> Maybe (Char,String)
> getchar ('\\':'n':'\'':xs) = Just ('\n',xs) -- Newline
> getchar ('\\':'r':'\'':xs) = Just ('\r',xs) -- CR
> getchar ('\\':'t':'\'':xs) = Just ('\t',xs) -- Tab
> getchar ('\\':'b':'\'':xs) = Just ('\b',xs) -- Backspace
> getchar ('\\':'a':'\'':xs) = Just ('\a',xs) -- Alert
> getchar ('\\':'f':'\'':xs) = Just ('\f',xs) -- Formfeed
> getchar ('\\':'0':'\'':xs) = Just ('\0',xs) -- null
> getchar ('\\':x:'\'':xs) = Just (x,xs) -- Literal
> getchar (x:'\'':xs) = Just (x,xs)
> getchar _ = Nothing