{-# LANGUAGE NoMonomorphismRestriction, PatternGuards #-} module Language.Core.Parser ( parseModule ) where import Data.Char import Data.Ratio import Text.ParserCombinators.Parsec import Control.Monad import Debug.Trace import Language.Core.Syntax import Language.Core.Pretty import qualified Data.ByteString.Char8 as B import qualified Data.ByteString.Lazy.Char8 as L type LParser = GenParser (Pos Token) () data Pos a = Pos !Int !Int a deriving Show getToken (Pos _ _ token) = token mkPos line col token = Pos (fromIntegral line) (fromIntegral col) token data Token = Keyword String | Colon | DColon | SColon | Arrow | LParen | RParen | LBrace | RBrace | Dot | Equal | At | Star | Hash | Dash | Percent | QuestionMark | Backslash | Uname L.ByteString | Lname L.ByteString | String String | Char Char | Number Integer deriving (Show, Eq) lexer :: L.ByteString -> [Pos Token] lexer = worker 1 0 where worker line col inp = case L.uncons inp of Nothing -> [] Just ('\n',cs) -> worker (line+1) 0 cs Just ('"',cs) -> let (str,len,rest) = lex_string cs in mkPos line col (String str) : worker line (col+len+2) rest Just ('\'',cs) -> let (char, len, rest) = lex_char cs in mkPos line col (Char char) : worker line (col+len+2) (L.drop 1 rest) Just (c,cs) | isSpace c -> worker line (col+1) cs | Just (token,len,rest) <- findSymbol inp -> mkPos line col token : worker line (col+len) rest | isLetter c -> let (name, rest) = L.span isNameChar inp token = if isUpper c then Uname name else Lname name in mkPos line col token : worker line (col+L.length name) rest | isDigit c -> let (digits, rest) = L.span isDigit inp token = Number (read (L.unpack digits)) in mkPos line col token : worker line (col+L.length digits) rest | otherwise -> worker line (col+1) cs -- error $ "Unhandled: " ++ take 20 (L.unpack inp) lex_string = worker 0 "" where worker len acc inp = case L.uncons inp of Nothing -> error "lexer failure in string: eof" Just ('"',cs) -> (reverse acc, len, cs) _ -> let (c, l, cs) = lex_char inp in worker (len+l) (c:acc) cs lex_char inp = case L.uncons inp of Just ('\\',cs) -> case L.unpack (L.take 3 cs) of ['x',ah,al] | all isHexDigit [ah,al] -> (chr (digitToInt ah*16 + digitToInt al), 4, L.drop 3 cs) _ -> error "lexer failure in string: bad hex" Just (c,cs) | c >= '\x20' && c <= '\x7E' && c `notElem` ['\x22', '\x27', '\x5C'] -> (c, 1, cs) Just (c,cs) -> error $ "lexer failure in string: invalid char: " ++ show c isNameChar c = isUpper c || isLower c || isDigit c || c == '\'' findSymbol inp = worker symbols where worker [] = Nothing worker ((str,token):xs) = if str `L.isPrefixOf` inp then Just (token, L.length str, L.drop (L.length str) inp) else worker xs symbols = [ (L.pack "::", DColon) , (L.pack ":", Colon) , (L.pack ";", SColon) , (L.pack "(", LParen) , (L.pack ")", RParen) , (L.pack "{", LBrace) , (L.pack "}", RBrace) , (L.pack "->", Arrow) , (L.pack ".", Dot) , (L.pack "=", Equal) , (L.pack "@", At) , (L.pack "*", Star) , (L.pack "?", QuestionMark) , (L.pack "#", Hash) , (L.pack "-", Dash) , (L.pack "\\", Backslash) , (L.pack "%module", Keyword "module") , (L.pack "%data", Keyword "data") , (L.pack "%newtype", Keyword "newtype") , (L.pack "%rec", Keyword "rec") , (L.pack "%case", Keyword "case") , (L.pack "%forall", Keyword "forall") , (L.pack "%let", Keyword "let") , (L.pack "%note", Keyword "note") , (L.pack "%external", Keyword "external") , (L.pack "%dynexternal", Keyword "dynexternal") , (L.pack "%label", Keyword "label") , (L.pack "%_", Keyword "_") , (L.pack "%cast", Keyword "cast") , (L.pack "%left", Keyword "left") , (L.pack "%right", Keyword "right") , (L.pack "%sym", Keyword "sym") , (L.pack "%unsafe", Keyword "unsafe") , (L.pack "%inst", Keyword "inst") , (L.pack "%trans", Keyword "trans") , (L.pack "%unsafe", Keyword "unsafe") , (L.pack "%of", Keyword "of") , (L.pack "%in", Keyword "in") , (L.pack "%", Percent) ] main :: IO () main = do inp <- L.readFile --"./hcr/Tuple.hcr" --"./src/ExternalCore.hcr" "../base.hcr" --print (length (lexer inp)) --mapM_ print (lexer inp) parseTest (many1 moduleP >>= \m -> notFollowedBy anyToken >> return (map ppModule m) >> return ()) (lexer inp) parseModule :: SourceName -> L.ByteString -> Either ParseError Module parseModule src inp = runParser moduleP () src (lexer inp) moduleP :: LParser Module moduleP = do keyword "module" pkg <- pkgname matchToken Colon modName <- mident trace (L.unpack modName) $ return () tdefs <- tdef `endBy` (matchToken SColon) vdefgs <- vdefg `endBy` (matchToken SColon) return $ Module (pkg,modName) tdefs vdefgs vdefg = choice [ do keyword "rec" braces $ liftM Rec (vdef `sepBy1` (matchToken SColon)) , do v <- vdef return $ Nonrec v ] "vdefg" vdef = do name <- try qvar <|> do name <- lname; return (L.empty,L.empty,name) matchToken DColon t <- ty matchToken Equal e <- expP return (False, name, t, e) "vdef" expP = choice [ do fn <- aexp args <- many arg let app e (Left t) = Appt e t app e (Right a) = App e a return $ foldl app fn args , do matchToken Backslash binds <- many1 binder matchToken Arrow e <- expP return $ foldr Lam e binds , do keyword "case" t <- aty e <- expP keyword "of" b <- vbind alts <- braces $ alt `sepBy1` (matchToken SColon) return $ Case e b t alts , do keyword "cast" e <- aexp t <- aty return $ Cast e t , do keyword "let" def <- vdefg keyword "in" e <- expP return $ Let def e , do keyword "note" note <- stringP e <- expP return $ Note note e , do keyword "external" conv <- lname target <- stringP t <- aty return $ External target (L.unpack conv) t , do keyword "dynexternal" conv <- lname t <- aty return $ DynExternal (L.unpack conv) t , do keyword "label" str <- stringP return $ Label str ] alt = choice [ do con <- qdcon tbinds <- many (matchToken At >> tbind) vbinds <- many vbind matchToken Arrow e <- expP return $ Acon con tbinds vbinds e , do keyword "_" matchToken Arrow e <- expP return $ Adefault e , do l <- lit matchToken Arrow e <- expP return $ Alit l e ] "alt" binder = choice [ do matchToken At liftM Tb tbind , liftM Vb vbind ] "binder" arg = choice [ do matchToken At liftM Left aty , liftM Right aexp ] "arg" aexp = choice [ try $ liftM Dcon qdcon , try $ liftM Var qvar , try $ liftM Var (lname >>= \name -> return (L.empty,L.empty,name)) , try $ liftM Lit lit , parens expP ] lit = choice [ try $ parens $ do cs <- stringP matchToken DColon t <- ty return (Lstring cs t) , try $ parens $ do c <- charP matchToken DColon t <- ty return (Lchar c t) , try $ parens $ do m <- optional (matchToken Dash) ds <- number matchToken DColon t <- ty return (Lint ds t) , try $ parens $ do n <- number <|> parens (matchToken Dash >> number) matchToken Percent d <- number matchToken DColon t <- ty return $ Lrational (n % d) t ] tdef = choice [dataP, newtypeP] dataP = do keyword "data" name <- qtycon tbinds <- many tbind matchToken Equal cdefs <- braces $ cdef `sepBy` matchToken SColon return $ Data name tbinds cdefs "dataP" newtypeP = do keyword "newtype" name <- qtycon coercion <- qtycon tbinds <- many tbind matchToken Equal t <- ty return $ Newtype name coercion tbinds t cdef = do name <- qdcon tbinds <- many (matchToken At >> tbind) tys <- many aty return $ Constr name tbinds tys aty = choice [ try $ liftM Tcon qtycon , liftM Tvar tyvar , parens ty , do keyword "trans" liftM2 TransCoercion aty aty , do keyword "sym" liftM SymCoercion aty , do keyword "right" liftM RightCoercion aty , do keyword "left" liftM LeftCoercion aty , do keyword "unsafe" liftM2 UnsafeCoercion aty aty , do keyword "inst" liftM2 InstCoercion aty aty ] bty = do ts <- many1 aty return $ foldl1 Tapp ts ty = choice [ do keyword "forall" binds <- many1 tbind matchToken Dot t <- ty return $ foldr Tforall t binds , try $ do a <- bty matchToken Arrow b <- ty return (Tapp (Tapp (Tcon tcArrow) a) b) , bty ] tbind = choice [ do var <-tyvar return $ (var, Klifted) , parens $ do var <- tyvar matchToken DColon k <- kind return (var, k) ] vbind = parens $ do v <- lname matchToken DColon t <- ty return (v,t) akind = choice [ matchToken Star >> return Klifted , matchToken Hash >> return Kunlifted , matchToken QuestionMark >> return Kopen , parens $ kind ] kind = choice [ try $ do atomic <- akind matchToken Arrow k <- kind return (Karrow atomic k) , akind ] tyvar = lname qdcon = qual uname qtycon = qual uname qvar = qual lname qual a = do pkg <- pkgname matchToken Colon mod <- uname matchToken Dot t <- a return (pkg, mod, t) tycon = uname pkgname = uname <|> lname mident = uname --namechar = lower <|> upper <|> digit <|> char '\'' movePos p (Pos l c _) _ = setSourceLine (setSourceColumn p c) l uname = tokenPrim (const "uname") movePos (\t -> case getToken t of Uname n -> Just n; _ -> Nothing) lname = tokenPrim (const "lname") movePos (\t -> case getToken t of Lname n -> Just n; _ -> Nothing) number = tokenPrim (const "number") movePos (\t -> case getToken t of Number n -> Just n; _ -> Nothing) stringP = tokenPrim (const "string") movePos (\t -> case getToken t of String n -> Just n; _ -> Nothing) charP = tokenPrim (const "char") movePos (\t -> case getToken t of Char n -> Just n; _ -> Nothing) matchToken token = tokenPrim (const $ show token) movePos (\t -> if getToken t == token then Just () else Nothing) -- Utilities parens = between (matchToken LParen) (matchToken RParen) braces = between (matchToken LBrace) (matchToken RBrace) keyword txt = matchToken (Keyword txt)