{-# language DataKinds #-} {-# language FlexibleContexts #-} {-# language MultiParamTypeClasses, FlexibleInstances #-} {-| Module : Language.Python.Parse Copyright : (C) CSIRO 2017-2019 License : BSD3 Maintainer : Isaac Elliott Stability : experimental Portability : non-portable -} module Language.Python.Parse ( module Data.Validation , module Language.Python.Parse.Error , Parser -- * Parsing some 'Text' , parseModule , parseStatement , parseExpr , parseExprList -- * Parsing from a file , readModule , readStatement , readExpr , readExprList -- * Source Information , SrcInfo(..), initialSrcInfo ) where import Control.Applicative ((<|>)) import Data.Bifunctor (first) import Data.List.NonEmpty (NonEmpty) import Data.Text (Text) import Data.Validation import Text.Megaparsec (eof) import qualified Data.Text.IO as Text import Language.Python.Internal.Lexer ( SrcInfo(..), initialSrcInfo, withSrcInfo , tokenize, insertTabs ) import Language.Python.Internal.Token (PyToken) import Language.Python.Internal.Parse ( Parser, runParser, level, module_, statement, exprOrStarList , expr, space ) import Language.Python.Internal.Syntax.IR (AsIRError) import Language.Python.Parse.Error import Language.Python.Syntax.Ann import Language.Python.Syntax.Expr (Expr) import Language.Python.Syntax.Module (Module) import Language.Python.Syntax.Statement (Statement) import Language.Python.Syntax.Whitespace (Indents (..)) import qualified Language.Python.Internal.Syntax.IR as IR -- | Parse a module -- -- https://docs.python.org/3/reference/toplevel_components.html#file-input parseModule :: ( AsLexicalError e Char , AsTabError e SrcInfo , AsIncorrectDedent e SrcInfo , AsParseError e (PyToken SrcInfo) , AsIRError e SrcInfo ) => FilePath -- ^ File name -> Text -- ^ Input to parse -> Validation (NonEmpty e) (Module '[] SrcInfo) parseModule fp input = let si = initialSrcInfo fp ir = do tokens <- tokenize fp input tabbed <- insertTabs si tokens runParser fp module_ tabbed in fromEither (first pure ir) `bindValidation` IR.fromIR -- | Parse a statement -- -- https://docs.python.org/3/reference/compound_stmts.html#grammar-token-statement parseStatement :: ( AsLexicalError e Char , AsTabError e SrcInfo , AsIncorrectDedent e SrcInfo , AsParseError e (PyToken SrcInfo) , AsIRError e SrcInfo ) => FilePath -- ^ File name -> Text -- ^ Input to parse -> Validation (NonEmpty e) (Statement '[] SrcInfo) parseStatement fp input = let si = initialSrcInfo fp ir = do tokens <- tokenize fp input tabbed <- insertTabs si tokens runParser fp ((statement tlIndent =<< tlIndent) <* eof) tabbed in fromEither (first pure ir) `bindValidation` IR.fromIR_statement where tlIndent = level <|> withSrcInfo (pure $ Indents [] . Ann) -- | Parse an expression list (unparenthesised tuple) -- -- https://docs.python.org/3.5/reference/expressions.html#grammar-token-expression_list parseExprList :: ( AsLexicalError e Char , AsTabError e SrcInfo , AsIncorrectDedent e SrcInfo , AsParseError e (PyToken SrcInfo) , AsIRError e SrcInfo ) => FilePath -- ^ File name -> Text -- ^ Input to parse -> Validation (NonEmpty e) (Expr '[] SrcInfo) parseExprList fp input = let si = initialSrcInfo fp ir = do tokens <- tokenize fp input tabbed <- insertTabs si tokens runParser fp (exprOrStarList space <* eof) tabbed in fromEither (first pure ir) `bindValidation` IR.fromIR_expr -- | Parse an expression -- -- https://docs.python.org/3.5/reference/expressions.html#grammar-token-expression parseExpr :: ( AsLexicalError e Char , AsTabError e SrcInfo , AsIncorrectDedent e SrcInfo , AsParseError e (PyToken SrcInfo) , AsIRError e SrcInfo ) => FilePath -- ^ File name -> Text -- ^ Input to parse -> Validation (NonEmpty e) (Expr '[] SrcInfo) parseExpr fp input = let si = initialSrcInfo fp ir = do tokens <- tokenize fp input tabbed <- insertTabs si tokens runParser fp (expr space <* eof) tabbed in fromEither (first pure ir) `bindValidation` IR.fromIR_expr -- | Parse a module from a file -- -- https://docs.python.org/3/reference/toplevel_components.html#file-input readModule :: ( AsLexicalError e Char , AsTabError e SrcInfo , AsIncorrectDedent e SrcInfo , AsParseError e (PyToken SrcInfo) , AsIRError e SrcInfo ) => FilePath -- ^ File to read -> IO (Validation (NonEmpty e) (Module '[] SrcInfo)) readModule fp = parseModule fp <$> Text.readFile fp -- | Parse a statement from a file -- -- https://docs.python.org/3/reference/compound_stmts.html#grammar-token-statement readStatement :: ( AsLexicalError e Char , AsTabError e SrcInfo , AsIncorrectDedent e SrcInfo , AsParseError e (PyToken SrcInfo) , AsIRError e SrcInfo ) => FilePath -- ^ File to read -> IO (Validation (NonEmpty e) (Statement '[] SrcInfo)) readStatement fp = parseStatement fp <$> Text.readFile fp -- | Parse an expression from a file -- -- https://docs.python.org/3.5/reference/expressions.html#grammar-token-expression readExpr :: ( AsLexicalError e Char , AsTabError e SrcInfo , AsIncorrectDedent e SrcInfo , AsParseError e (PyToken SrcInfo) , AsIRError e SrcInfo ) => FilePath -- ^ File to read -> IO (Validation (NonEmpty e) (Expr '[] SrcInfo)) readExpr fp = parseExpr fp <$> Text.readFile fp -- | Parse an expression list (unparenthesised tuple) from a file -- -- https://docs.python.org/3.5/reference/expressions.html#grammar-token-expression_list readExprList :: ( AsLexicalError e Char , AsTabError e SrcInfo , AsIncorrectDedent e SrcInfo , AsParseError e (PyToken SrcInfo) , AsIRError e SrcInfo ) => FilePath -- ^ File to read -> IO (Validation (NonEmpty e) (Expr '[] SrcInfo)) readExprList fp = parseExprList fp <$> Text.readFile fp