module Language.Core.Parser
( parseModule
, Pos(..)
, Token(..)
, lexer
) where
import Data.Char
import Data.Ratio
import Text.ParserCombinators.Parsec
import Control.Monad
import Debug.Trace
import Language.Core.Syntax
import Language.Core.Pretty
import qualified Data.ByteString.Char8 as B
import qualified Data.ByteString.Lazy.Char8 as L
type LParser = GenParser (Pos Token) ()
data Pos a = Pos !Int !Int a deriving Show
getToken (Pos _ _ token) = token
mkPos line col token = Pos (fromIntegral line) (fromIntegral col) token
data Token
= Keyword String
| Colon
| DColon
| SColon
| Arrow
| LParen
| RParen
| LBrace
| RBrace
| Dot
| Equal
| At
| Star
| Hash
| Dash
| Percent
| QuestionMark
| Backslash
| Uname L.ByteString
| Lname L.ByteString
| String String
| Char Char
| Number Integer
deriving (Show, Eq)
lexer :: L.ByteString -> [Pos Token]
lexer = worker 1 0
where worker line col inp
= case L.uncons inp of
Nothing -> []
Just ('\n',cs) -> worker (line+1) 0 cs
Just ('"',cs) -> let (str,len,rest) = lex_string cs
in mkPos line col (String str) : worker line (col+len+2) rest
Just ('\'',cs) -> let (char, len, rest) = lex_char cs
in mkPos line col (Char char) : worker line (col+len+2) (L.drop 1 rest)
Just (c,cs)
| isSpace c -> worker line (col+1) cs
| Just (token,len,rest) <- findSymbol inp
-> mkPos line col token : worker line (col+len) rest
| isLetter c -> let (name, rest) = L.span isNameChar inp
token = if isUpper c then Uname name
else Lname name
in mkPos line col token : worker line (col+L.length name) rest
| isDigit c -> let (digits, rest) = L.span isDigit inp
token = Number (read (L.unpack digits))
in mkPos line col token : worker line (col+L.length digits) rest
| otherwise -> worker line (col+1) cs
lex_string = worker 0 ""
where worker len acc inp
= case L.uncons inp of
Nothing -> error "lexer failure in string: eof"
Just ('"',cs) -> (reverse acc, len, cs)
_ -> let (c, l, cs) = lex_char inp
in worker (len+l) (c:acc) cs
lex_char inp = case L.uncons inp of
Just ('\\',cs) -> case L.unpack (L.take 3 cs) of
['x',ah,al] | all isHexDigit [ah,al]
-> (chr (digitToInt ah*16 + digitToInt al), 4, L.drop 3 cs)
_ -> error "lexer failure in string: bad hex"
Just (c,cs) | c >= '\x20' && c <= '\x7E' && c `notElem` ['\x22', '\x27', '\x5C']
-> (c, 1, cs)
Just (c,cs) -> error $ "lexer failure in string: invalid char: " ++ show c
isNameChar c = isUpper c || isLower c || isDigit c || c == '\''
findSymbol inp = worker symbols
where worker [] = Nothing
worker ((str,token):xs) = if str `L.isPrefixOf` inp
then Just (token, L.length str, L.drop (L.length str) inp)
else worker xs
symbols = [ (L.pack "::", DColon)
, (L.pack ":", Colon)
, (L.pack ";", SColon)
, (L.pack "(", LParen)
, (L.pack ")", RParen)
, (L.pack "{", LBrace)
, (L.pack "}", RBrace)
, (L.pack "->", Arrow)
, (L.pack ".", Dot)
, (L.pack "=", Equal)
, (L.pack "@", At)
, (L.pack "*", Star)
, (L.pack "?", QuestionMark)
, (L.pack "#", Hash)
, (L.pack "-", Dash)
, (L.pack "\\", Backslash)
, (L.pack "%module", Keyword "module")
, (L.pack "%data", Keyword "data")
, (L.pack "%newtype", Keyword "newtype")
, (L.pack "%rec", Keyword "rec")
, (L.pack "%case", Keyword "case")
, (L.pack "%forall", Keyword "forall")
, (L.pack "%let", Keyword "let")
, (L.pack "%note", Keyword "note")
, (L.pack "%external", Keyword "external")
, (L.pack "%dynexternal", Keyword "dynexternal")
, (L.pack "%label", Keyword "label")
, (L.pack "%_", Keyword "_")
, (L.pack "%cast", Keyword "cast")
, (L.pack "%left", Keyword "left")
, (L.pack "%right", Keyword "right")
, (L.pack "%sym", Keyword "sym")
, (L.pack "%unsafe", Keyword "unsafe")
, (L.pack "%inst", Keyword "inst")
, (L.pack "%trans", Keyword "trans")
, (L.pack "%unsafe", Keyword "unsafe")
, (L.pack "%of", Keyword "of")
, (L.pack "%in", Keyword "in")
, (L.pack "%", Percent)
]
parseModule :: SourceName -> L.ByteString -> Either ParseError Module
parseModule src inp = runParser moduleP () src (lexer inp)
moduleP :: LParser Module
moduleP = do keyword "module"
pkg <- pkgname
matchToken Colon
modName <- mident
tdefs <- tdef `endBy` (matchToken SColon)
vdefgs <- vdefg `endBy` (matchToken SColon)
return $ Module (pkg,modName) tdefs vdefgs
vdefg = choice
[ do keyword "rec"
braces $ liftM Rec (vdef `sepBy1` (matchToken SColon))
, do v <- vdef
return $ Nonrec v
] <?> "vdefg"
vdef = do name <- try qvar <|> do name <- lname; return (L.empty,L.empty,name)
matchToken DColon
t <- ty
matchToken Equal
e <- expP
return $ Vdef { vdefLocal = False
, vdefName = name
, vdefType = t
, vdefExp = e }
<?> "vdef"
expP = choice
[ do fn <- aexp
args <- many arg
let app e (Left t) = Appt e t
app e (Right a) = App e a
return $ foldl app fn args
, do matchToken Backslash
binds <- many1 binder
matchToken Arrow
e <- expP
return $ foldr Lam e binds
, do keyword "case"
t <- aty
e <- expP
keyword "of"
b <- vbind
alts <- braces $ alt `sepBy1` (matchToken SColon)
return $ Case e b t alts
, do keyword "cast"
e <- aexp
t <- aty
return $ Cast e t
, do keyword "let"
def <- vdefg
keyword "in"
e <- expP
return $ Let def e
, do keyword "note"
note <- stringP
e <- expP
return $ Note note e
, do keyword "external"
conv <- lname
target <- stringP
t <- aty
return $ External target (L.unpack conv) t
, do keyword "dynexternal"
conv <- lname
t <- aty
return $ DynExternal (L.unpack conv) t
, do keyword "label"
str <- stringP
return $ Label str
]
alt = choice [ do con <- qdcon
tbinds <- many (matchToken At >> tbind)
vbinds <- many vbind
matchToken Arrow
e <- expP
return $ Acon con tbinds vbinds e
, do keyword "_"
matchToken Arrow
e <- expP
return $ Adefault e
, do l <- lit
matchToken Arrow
e <- expP
return $ Alit l e
] <?> "alt"
binder = choice
[ do matchToken At
liftM Tb tbind
, liftM Vb vbind
] <?> "binder"
arg = choice
[ do matchToken At
liftM Left aty
, liftM Right aexp
] <?> "arg"
aexp = choice
[ try $ liftM Dcon qdcon
, try $ liftM Var qvar
, try $ liftM Var (lname >>= \name -> return (L.empty,L.empty,name))
, try $ liftM Lit lit
, parens expP
]
lit = choice
[ try $ parens $
do cs <- stringP
matchToken DColon
t <- ty
return (Lstring cs t)
, try $ parens $
do c <- charP
matchToken DColon
t <- ty
return (Lchar c t)
, try $ parens $
do m <- optional (matchToken Dash)
ds <- number
matchToken DColon
t <- ty
return (Lint ds t)
, try $ parens $
do n <- number <|> parens (matchToken Dash >> number)
matchToken Percent
d <- number
matchToken DColon
t <- ty
return $ Lrational (n % d) t
]
tdef = choice [dataP, newtypeP]
dataP = do keyword "data"
name <- qtycon
tbinds <- many tbind
matchToken Equal
cdefs <- braces $ cdef `sepBy` matchToken SColon
return $ Data name tbinds cdefs
<?> "dataP"
newtypeP = do keyword "newtype"
name <- qtycon
coercion <- qtycon
tbinds <- many tbind
matchToken Equal
t <- ty
return $ Newtype name coercion tbinds t
cdef = do name <- qdcon
tbinds <- many (matchToken At >> tbind)
tys <- many aty
return $ Constr name tbinds tys
aty = choice
[ try $ liftM Tcon qtycon
, liftM Tvar tyvar
, parens ty
, do keyword "trans"
liftM2 TransCoercion aty aty
, do keyword "sym"
liftM SymCoercion aty
, do keyword "right"
liftM RightCoercion aty
, do keyword "left"
liftM LeftCoercion aty
, do keyword "unsafe"
liftM2 UnsafeCoercion aty aty
, do keyword "inst"
liftM2 InstCoercion aty aty
]
bty = do ts <- many1 aty
return $ foldl1 Tapp ts
ty = choice
[ do keyword "forall"
binds <- many1 tbind
matchToken Dot
t <- ty
return $ foldr Tforall t binds
, try $ do a <- bty
matchToken Arrow
b <- ty
return (Tapp (Tapp (Tcon tcArrow) a) b)
, bty
]
tbind = choice
[ do var <-tyvar
return $ (var, Klifted)
, parens $
do var <- tyvar
matchToken DColon
k <- kind
return (var, k)
]
vbind = parens $ do v <- lname
matchToken DColon
t <- ty
return ((L.empty,L.empty,v),t)
akind = choice [ matchToken Star >> return Klifted
, matchToken Hash >> return Kunlifted
, matchToken QuestionMark >> return Kopen
, parens $ kind ]
kind = choice [ try $ do atomic <- akind
matchToken Arrow
k <- kind
return (Karrow atomic k)
, try $ do a <- parens ty
matchToken Colon
matchToken Equal
matchToken Colon
b <- parens ty
return $ Keq a b
, akind ]
tyvar = lname
qdcon = qual uname
qtycon = qual uname
qvar = qual lname
qual a = do pkg <- pkgname
matchToken Colon
mod <- uname
matchToken Dot
t <- a
return (pkg, mod, t)
tycon = uname
pkgname = uname <|> lname
mident = uname
movePos p (Pos l c _) _ = setSourceLine (setSourceColumn p c) l
uname = tokenPrim (const "uname") movePos (\t -> case getToken t of Uname n -> Just n; _ -> Nothing)
lname = tokenPrim (const "lname") movePos (\t -> case getToken t of Lname n -> Just n; _ -> Nothing)
number = tokenPrim (const "number") movePos (\t -> case getToken t of Number n -> Just n; _ -> Nothing)
stringP = tokenPrim (const "string") movePos (\t -> case getToken t of String n -> Just n; _ -> Nothing)
charP = tokenPrim (const "char") movePos (\t -> case getToken t of Char n -> Just n; _ -> Nothing)
matchToken token
= tokenPrim (const $ show token) movePos (\t -> if getToken t == token then Just () else Nothing)
parens = between (matchToken LParen)
(matchToken RParen)
braces = between (matchToken LBrace)
(matchToken RBrace)
keyword txt = matchToken (Keyword txt)