{-# LANGUAGE DataKinds #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE NamedFieldPuns #-} {-# LANGUAGE OverloadedRecordDot #-} -- | This module contains functions that translate Haskell quasiquotes into -- internal representations of languages and passes as defined in "Nanopass.Internal.Representation". -- This is done by first parsing an s-expression with "Text.SExpression", -- and then recursively recognizing the components of that s-expression. -- -- The primary entry points are 'parseLanguage' and TODO 'parsePass`. -- Other recognizers are exported to serve as the promary source for -- documentation about the grammar used in these quasiquotes. module Nanopass.Internal.Parser ( -- * Recognizers ParseResult , parseLanguage -- ** Base Languages , parseBaseLanguage , parseNonterm , parseProduction -- ** Language Modification , parseLangMod , parseNontermsEdit , parseProductionsEdit -- ** Shared Recognizers , parseLangLHS , parseNontermBody , parseProductionBody , parseType -- * Passes , parsePass -- * S-Expressions , getSexpr , Loc(..) , toUpColonName ) where import Nanopass.Internal.Representation import Control.Monad (forM) import Data.Functor ((<&>)) import Nanopass.Internal.Error (Error(..)) import Text.Megaparsec (runParser',State(..),PosState(..),SourcePos(..),errorBundlePretty) import Text.Megaparsec.Char (space1) import Text.Megaparsec.Pos (defaultTabWidth,mkPos) import Text.SExpression (SExpr(..),Parser,parseSExpr,def) import qualified Data.Map as Map import qualified Text.Megaparsec as P import qualified Text.Megaparsec.Char.Lexer as P ------------------------------ ------ Glue it together ------ ------------------------------ type ParseResult = Either (Language 'Unvalidated UpName) -- ^ base language defintion LangMod -- ^ modifications to a language -- | @ -- Language ::= \ | \ -- @ -- -- * for @BaseLang@, see 'parseBaseLanguage' -- * for @LangMod@, see 'parseLangMod' parseLanguage :: (Loc, String) -> Either Error ParseResult parseLanguage inp@(_, orig) = do sexpr <- getSexpr inp case sexpr of List (_:Atom "from":_) -> Right <$> parseLangMod orig sexpr _ -> Left <$> parseBaseLanguage orig sexpr --------------------------------- ------ Recognize Languages ------ --------------------------------- -- | @ -- BaseLang ::= -- (\ language name and type variables -- \ documentation -- \) syntactic categories -- @ -- -- * for @LangLHS@, see 'parseLangLHS' -- * for @Nonterm@, see 'parseNonterm' parseBaseLanguage :: String -> SExpr -> Either Error (Language 'Unvalidated UpName) parseBaseLanguage originalProgram (List (lhs:rest)) = do (name, langParams) <- parseLangLHS lhs let langName = SourceName name (docs, nonterms_) = spanDocstrs rest nontermList <- parseNonterm `mapM` nonterms_ let nonterms = Map.fromList $ nontermList <&> \s -> (s.nontermName.name, s) pure Language { langName , langInfo = LanguageInfo { langParams = SourceName <$> langParams , nonterms , originalProgram = Just originalProgram , baseDefdLang = Nothing } } parseBaseLanguage _ other = Left $ ExpectingLanguage other -- | @ -- LangLHS ::= \ language name, zero type variables -- | (\ \) language name, type variables -- @ -- -- * for @UpCase@, see 'toUpName' -- * for @LowCase@, see 'toLowName' parseLangLHS :: SExpr -> Either Error (UpName, [LowName]) parseLangLHS (Atom str) = case toUpName str of Just name -> pure (name, []) Nothing -> Left $ ExpectedLangName str parseLangLHS (List (Atom str:rest)) = do name <- case toUpName str of Just name -> pure name Nothing -> Left $ ExpectedLangName str tyVars <- forM rest $ \case Atom tvStr | Just tvName <- toLowName tvStr -> pure tvName it -> Left $ ExpectingTypeVariable it pure (name, tyVars) parseLangLHS it = Left $ ExpectedLangLHS it -- | @ -- Nonterm ::= -- (\ type name -- \ documentation -- \) constructor arguments -- @ -- -- * for @UpCase@, see 'toUpName' -- * for @Production@, see 'parseProduction' parseNonterm :: SExpr -> Either Error (Nonterm 'Unvalidated) parseNonterm (List (Atom str:rest)) = do nontermName <- case toUpName str of Just name -> pure name Nothing -> Left $ ExpectedNontermName (Just $ Atom str) parseNontermBody nontermName rest parseNonterm (List (other:_)) = Left $ ExpectedNontermName (Just other) parseNonterm other = Left $ ExpectedNonterm other -- | Separated out from 'parseNonterm' because it is useful in 'parseNontermsEdit' as well. parseNontermBody :: UpName -> [SExpr] -> Either Error (Nonterm 'Unvalidated) parseNontermBody nontermName rest = do let (docs, prods_) = spanDocstrs rest productionList <- parseProduction `mapM` prods_ let productions = Map.fromList $ productionList <&> \p -> (p.prodName.name, p) pure Nonterm { nontermName = SourceName nontermName , productions } -- | @ -- Production ::= -- (\ constructor name -- \ documentation -- \) constructor arguments -- @ -- -- * for @UpCase@, see 'toUpName' -- * for @Type@, see 'parseType' parseProduction :: SExpr -> Either Error (Production 'Unvalidated) parseProduction (List (Atom ctorStr:rest)) = do prodName <- case toUpName ctorStr of Just name -> pure name Nothing -> Left $ ExpectedConstructorName (Just $ Atom ctorStr) parseProductionBody prodName rest parseProduction other = Left $ ExpectedProduction other -- | Separated out from 'parseProduction' because it is useful in 'parseProductionsEdit' as well. parseProductionBody :: UpName -> [SExpr] -> Either Error (Production 'Unvalidated) parseProductionBody prodName rest = do let (docs, args) = spanDocstrs rest subterms <- parseType `mapM` args pure Production { prodName = SourceName prodName , subterms } -- | @ -- Type ::= \ type parameter -- | \ plain Haskell type (kind *) -- or non-terminal (language parameters already applied) -- | (\ \) plain Haskell type application -- | (\'?\' \) Maybe type -- | (\'*\' \) List type -- | (\'+\' \) NonEmpty type -- | () | (\'&\') unit type -- | (\'&\' \) Only type TODO -- | (\'&\' \ \ \) tuple types -- @ -- -- -- * for @UpCase@, see 'toUpName' -- * for @LowCase@, see 'toLowName' -- * for @UpColonCase@, see 'toUpColonName' parseType :: SExpr -> Either Error (TypeDesc 'Unvalidated) parseType = \case Atom str | Just name <- toUpColonName str -> pure $ CtorType (SourceName name) [] | Just name <- toLowName str -> pure $ VarType (SourceName name) | otherwise -> Left $ ExpectingTypeNameOrVar str List [] -> pure UnitType List [Atom "?", x] -> MaybeType <$> parseType x List [Atom "*", x] -> ListType <$> parseType x List [Atom "+", x] -> NonEmptyType <$> parseType x List (Atom "&" : xs_) -> case xs_ of [] -> pure UnitType [x] -> parseType x -- TODO Data.Tuple.Only (x1:x2:xs) -> do t1 <- parseType x1 t2 <- parseType x2 ts <- parseType `mapM` xs pure $ TupleType t1 t2 ts List (x:xs) -> do ctor <- case x of Atom str | Just name <- toUpColonName str -> pure name _ -> Left $ ExpectedTypeConstructor x ts <- parseType `mapM` xs pure $ CtorType (SourceName ctor) ts x@(ConsList _ _) -> Left $ ConsListsDisallowed x x@(Number _) -> Left $ UnexpectedLiteral x x@(String _) -> Left $ UnexpectedLiteral x x@(Bool _) -> Left $ UnexpectedLiteral x spanDocstrs :: [SExpr] -> ([String], [SExpr]) spanDocstrs = loop [] where loop acc (String str:rest) = loop (str:acc) rest loop acc rest = (reverse acc, rest) ----------------------------------- ------ Language Modification ------ ----------------------------------- -- | @ -- LangMod ::= -- (\ new language name and type variables -- \'from\' keyword -- \ base language name -- \ documentation -- \) changes to the base language's syntactic categories -- @ -- -- * for @LangLHS@, see 'parseLangLHS' -- * for @UpColon@, see 'toUpColonName' -- * for @NontermsEdit@, see 'parseNontermsEdit' parseLangMod :: String -> SExpr -> Either Error LangMod parseLangMod originalModProgram (List (lhs:Atom "from":rest_)) = do (newLang, newParams) <- parseLangLHS lhs (baseLang, rest) <- case rest_ of (Atom str):rest | Just name <- toUpColonName str -> pure (name, rest) other:_ -> Left $ ExpectingBaseLanguage (Just other) _ -> Left $ ExpectingBaseLanguage Nothing let (docs, edits_) = spanDocstrs rest edits <- parseNontermsEdit `mapM` edits_ pure LangMod { baseLang , newLang , newParams = SourceName <$> newParams , nontermsEdit = edits , originalModProgram = Just originalModProgram } parseLangMod _ other = Left $ ExpectingKwFromAfterLHS other -- | @ -- NontermsEdit -- ::= (\'+\' add a syntactic category -- \ new non-terminal name -- \ documentation -- \) constructors -- | (\'-\' \) remove a syntactic category by name -- | (\'*\' modify a syntactic category's productions -- \ name of non-terminal to edit -- \) changes to the base language's non-terminal -- @ -- -- * for @UpCase@, see 'toUpName' -- * for @Production@, see 'parseProduction' -- * for @ProductionsEdit@, see 'parseProductionsEdit' parseNontermsEdit :: SExpr -> Either Error NontermsEdit parseNontermsEdit (List (Atom "+":rest_)) = do (nontermName, rest) <- case rest_ of (Atom str):rest | Just name <- toUpName str -> pure (name, rest) other:_ -> Left $ ExpectedNontermName (Just other) [] -> Left $ ExpectedNontermName Nothing nonterm <- parseNontermBody nontermName rest pure $ AddNonterm nonterm parseNontermsEdit (List (Atom "-":rest_)) = do (nontermName, rest) <- case rest_ of (Atom str):rest | Just name <- toUpName str -> pure (name, rest) other:_ -> Left $ ExpectedNontermName (Just other) [] -> Left $ ExpectedNontermName Nothing case rest of [] -> pure () x:_ -> Left $ UnexpectedSExprAfterDelete x pure $ DelNonterm nontermName parseNontermsEdit (List (Atom "*":rest_)) = do (nontermName, rest) <- case rest_ of (Atom str):rest | Just name <- toUpName str -> pure (name, rest) other:_ -> Left $ ExpectedNontermName (Just other) [] -> Left $ ExpectedNontermName Nothing edits <- parseProductionsEdit `mapM` rest pure $ ModNonterm nontermName edits parseNontermsEdit (List (other:_)) = Left $ ExpectingPlusMinusStar other parseNontermsEdit other = Left $ ExpectingNontermsEdit other -- | @ -- ProductionsEdit -- ::= (\'+\' add a production -- \ new constructor name -- \ documentation -- \) constructor arguments -- | (\'-\' \) remove a production by name -- @ -- -- * for @UpCase@, see 'toUpName' -- * for @Type@, see 'parseType' parseProductionsEdit :: SExpr -> Either Error ProductionsEdit parseProductionsEdit (List (Atom "+":rest_)) = do (prodName, rest) <- case rest_ of (Atom str):rest | Just name <- toUpName str -> pure (name, rest) other:_ -> Left $ ExpectedConstructorName (Just other) [] -> Left $ ExpectedConstructorName Nothing prod <- parseProductionBody prodName rest pure $ AddProd prod parseProductionsEdit (List (Atom "-":rest_)) = do (prodName, rest) <- case rest_ of (Atom str):rest | Just name <- toUpName str -> pure (name, rest) other:_ -> Left $ ExpectedConstructorName (Just other) [] -> Left $ ExpectedConstructorName Nothing case rest of [] -> pure () x:_ -> Left $ UnexpectedSExprAfterDelete x pure $ DelProd prodName parseProductionsEdit (List (other:_)) = Left $ ExpectingPlusMinus other parseProductionsEdit other = Left $ ExpectingProductionsEdit other --------------------------------- ------ Parse S-Expressions ------ --------------------------------- -- | This is a location type that should be sufficient to describe the start of a Template Haskell quasiquote. -- It is used in 'getSexpr' so that it can report errors from the actual source code location. data Loc = Loc { file :: FilePath , line :: Int , col :: Int } -- | This serves as an adapter between Template Haskell and whatever s-expression parser I decide to use. getSexpr :: (Loc, String) -> Either Error SExpr getSexpr (loc, inp) = case runParser' (sc *> parseSExpr def <* sc) state0 of (_, Left err) -> Left . SExprError $ errorBundlePretty err (_, Right sexpr) -> Right sexpr where sc :: Parser () sc = P.space space1 (P.skipLineComment ";") P.empty state0 = State { stateInput = inp , stateOffset = 0 , statePosState = PosState { pstateInput = inp , pstateOffset = 0 , pstateSourcePos = SourcePos { sourceName = loc.file , sourceLine = mkPos loc.line , sourceColumn = mkPos loc.col } , pstateTabWidth = defaultTabWidth , pstateLinePrefix = "" } , stateParseErrors = [] } -- | Since sexprs don't allow dot in names, we use colon instead. -- We just immediately translate it over into dots. -- -- That is, accept strings matching @[A-Z][a-zA-Z0-9_]\(:[A-Z][a-zA-Z0-9_])*@, -- and alter them with @s\/\\.\/:\/@. toUpColonName :: String -> Maybe UpDotName toUpColonName = toUpDotName . map (\c -> if c == ':' then '.' else c) -------------------------- ------ Parse Passes ------ -------------------------- -- | @ -- Pass -- ::= (\'from\' \ source lagnuage name -- \'to\' \ target lagnuage name -- \ documentation -- @ -- -- * for @UpColonCase@, see 'toUpColonName' parsePass :: (Loc, String) -> Either Error Pass parsePass inp = parsePassSexpr =<< getSexpr inp parsePassSexpr :: SExpr -> Either Error Pass parsePassSexpr (List (Atom"from":l1:Atom "to":l2:rest)) = do sourceLang <- case l1 of Atom str | Just name <- toUpColonName str -> pure $ SourceName name _ -> Left $ ExpectedUpDotNameAfterFrom l1 targetLang <- case l2 of Atom str | Just name <- toUpColonName str -> pure $ SourceName name _ -> Left $ ExpectedUpDotNameAfterTo l2 let (docs, after) = spanDocstrs rest case after of [] -> pure () _ -> Left $ UnexpectedSExprAfterPass sourceLang.name targetLang.name pure Pass { sourceLang , targetLang } parsePassSexpr other = Left $ MissingFromTo other