module DDC.Core.Lexer
( module DDC.Core.Lexer.Tokens
, module DDC.Core.Lexer.Names
, lexModuleWithOffside
, lexExp)
where
import DDC.Core.Lexer.Offside
import DDC.Core.Lexer.Comments
import DDC.Core.Lexer.Names
import DDC.Core.Lexer.Tokens
import DDC.Data.SourcePos
import DDC.Data.Token
import Data.Char
import Data.Text (Text)
import qualified Data.Text as T
import Data.Monoid
lexModuleWithOffside
:: FilePath
-> Int
-> String
-> [Token (Tok String)]
lexModuleWithOffside sourceName lineStart str
=
applyOffside [] []
$ addStarts
$ dropComments
$ lexText sourceName lineStart
$ T.pack str
lexExp :: FilePath
-> Int
-> String
-> [Token (Tok String)]
lexExp sourceName lineStart str
=
dropNewLines
$ dropComments
$ lexText sourceName lineStart
$ T.pack str
lexText :: String
-> Int
-> Text
-> [Token (Tok String)]
lexText sourceName lineStart xx
= lexWord lineStart 1 xx
where
lexWord :: Int -> Int -> Text -> [Token (Tok String)]
lexWord line column w
= match w
where
tok t = Token t (SourcePos sourceName line column)
tokM = tok . KM
tokA = tok . KA
tokN = tok . KN
lexMore n rest
= lexWord line (column + n) rest
lexUpto pat rest
= case dropWhile (not . T.isPrefixOf pat) (T.tails rest) of
x : _ -> x
_ -> T.empty
txt = T.pack
prefix str = T.stripPrefix (T.pack str)
match cs
| T.null cs
= []
| Just (' ', rest) <- T.uncons cs
= lexMore 1 rest
| Just ('\t', rest) <- T.uncons cs
= lexMore 8 rest
| Just rest <- T.stripPrefix (txt "{-#") cs
, (prag, rest') <- T.breakOn (txt "#-}") rest
, rest'' <- T.drop 3 rest'
, len <- 3 + T.length prag + 3
= tokA (KPragma prag) : lexMore len rest''
| Just rest <- T.stripPrefix (txt "{-") cs
= tokM KCommentBlockStart : lexMore 2 (lexUpto (txt "-}") rest)
| Just rest <- T.stripPrefix (txt "-}") cs
= tokM KCommentBlockEnd : lexMore 2 rest
| Just cs1 <- T.stripPrefix (txt "--") cs
, (_junk, rest) <- T.span (/= '\n') cs1
= tokM KCommentLineStart : lexMore 2 rest
| Just ('\n', rest) <- T.uncons cs
= tokM KNewLine : lexWord (line + 1) 1 rest
| not (T.compareLength cs 2 == LT)
, (cs1, rest) <- T.splitAt 2 cs
, Just t
<- case T.unpack cs1 of
"[:" -> Just KSquareColonBra
":]" -> Just KSquareColonKet
"{:" -> Just KBraceColonBra
":}" -> Just KBraceColonKet
"~>" -> Just KArrowTilde
"->" -> Just KArrowDash
"<-" -> Just KArrowDashLeft
"=>" -> Just KArrowEquals
"/\\" -> Just KBigLambdaSlash
"()" -> Just KDaConUnit
_ -> Nothing
= tokA t : lexMore 2 rest
| Just ('(', cs1) <- T.uncons cs
, Just (c, cs2) <- T.uncons cs1
, isOpStart c
, (body, cs3) <- T.span isOpBody cs2
, Just (')', rest) <- T.uncons cs3
= tokA (KOpVar (T.unpack (T.cons c body)))
: lexMore (2 + T.length (T.cons c body)) rest
| Just (c, cs1) <- T.uncons cs
, isDigit c
, (body, rest) <- T.span isLitBody cs1
= let str = T.unpack (T.cons c body)
in tokN (KLit str) : lexMore (length str) rest
| Just ('-', cs1) <- T.uncons cs
, Just (c, _) <- T.uncons cs1
, isDigit c
= let (body, rest) = T.span isLitBody cs1
str = T.unpack (T.cons '-' body)
in tokN (KLit str) : lexMore (length str) rest
| Just ('\"', cc) <- T.uncons cs
= let
eat n acc xs
| Just ('\\', xs1) <- T.uncons xs
, Just ('"', xs2) <- T.uncons xs1
= eat (n + 2) ('"' : acc) xs2
| Just ('\\', xs1) <- T.uncons xs
, Just ('n', xs2) <- T.uncons xs1
= eat (n + 2) ('\n' : acc) xs2
| Just ('"', xs1) <- T.uncons xs
= tokA (KString (T.pack (reverse acc)))
: lexWord line (column + n) xs1
| Just (c, xs1) <- T.uncons xs
= eat (n + 1) (c : acc) xs1
| otherwise
= [tok $ KErrorUnterm (T.unpack cs)]
in eat 0 [] cc
| Just (c, cs1) <- T.uncons cs
, isOpStart c
, (body, rest) <- T.span isOpBody cs1
, sym <- T.cons c body
, sym /= T.pack "="
, sym /= T.pack "|"
= tokA (KOp (T.unpack sym)) : lexMore (1 + T.length body) rest
| Just (c, rest) <- T.uncons cs
, Just t
<- case c of
'(' -> Just KRoundBra
')' -> Just KRoundKet
'[' -> Just KSquareBra
']' -> Just KSquareKet
'{' -> Just KBraceBra
'}' -> Just KBraceKet
'.' -> Just KDot
',' -> Just KComma
';' -> Just KSemiColon
'\\' -> Just KBackSlash
'=' -> Just KEquals
'|' -> Just KBar
_ -> Nothing
= tokA t : lexMore 1 rest
| Just ('^', cs1) <- T.uncons cs
, (ds, rest) <- T.span isDigit cs1
, T.length ds >= 1
= tokA (KIndex (read (T.unpack ds))) : lexMore (1 + T.length ds) rest
| Just ('^', rest) <- T.uncons cs
= tokA KHat : lexMore 1 rest
| Just ('λ', rest) <- T.uncons cs
= tokA KLambda : lexMore 1 rest
| Just ('Λ', rest) <- T.uncons cs
= tokA KBigLambda : lexMore 1 rest
| Just rest <- prefix "Pure" cs
= tokA KBotEffect : lexMore 4 rest
| Just rest <- prefix "Empty" cs
= tokA KBotClosure : lexMore 5 rest
| Just (c, cs1) <- T.uncons cs
, isConStart c
, (body, rest) <- T.span isConBody cs1
, (body', rest') <- case T.uncons rest of
Just ('\'', rest') -> (body <> T.pack "'", rest')
Just ('#', rest') -> (body <> T.pack "#", rest')
_ -> (body, rest)
= let readNamedCon s
| Just socon <- readSoConBuiltin s
= tokA (KSoConBuiltin socon) : lexMore (length s) rest'
| Just kicon <- readKiConBuiltin s
= tokA (KKiConBuiltin kicon) : lexMore (length s) rest'
| Just twcon <- readTwConBuiltin s
= tokA (KTwConBuiltin twcon) : lexMore (length s) rest'
| Just tccon <- readTcConBuiltin s
= tokA (KTcConBuiltin tccon) : lexMore (length s) rest'
| Just con <- readCon s
= tokN (KCon con) : lexMore (length s) rest'
| otherwise
= [tok (KErrorJunk [c])]
in readNamedCon (T.unpack (T.cons c body'))
| Just (c, cs1) <- T.uncons cs
, isVarStart c
, (body, rest) <- T.span isVarBody cs1
, (body', rest') <- case T.uncons rest of
Just ('#', rest') -> (body <> T.pack "#", rest')
_ -> (body, rest)
= let readNamedVar s
| "_" <- s
= tokA KUnderscore : lexMore (length s) rest'
| Just t <- lookup s keywords
= tok t : lexMore (length s) rest'
| Just v <- readVar s
= tokN (KVar v) : lexMore (length s) rest'
| otherwise
= [tok (KErrorJunk [c])]
in readNamedVar (T.unpack (T.cons c body'))
| otherwise
= case T.unpack cs of
(c : _) -> [tok $ KErrorJunk [c]]
_ -> [tok $ KErrorJunk []]