-- | Abstract syntax tree and pretty-printing for Python. -- Works for Python 2 and 3. -- A lot of the data structures are inspired by the language-python package; -- I have chosen not to have language-python as a dependency of sifflet-lib, -- however, because it would be overkill and still allows to little control -- over pretty-printing of Python expressionsw. module Sifflet.Foreign.Python (PModule(..) , PStatement(..) , PExpr(..) , PIdentifier(..) , PParameter(..) , POperator(..) , Precedence , alterParens , atomic , compound , ret , condS , condE , var , ident , pInt , pFloat , bool , char , string , paren , noParens , fullParens , bestParens , simplifyParens , par , unpar , call , param , fun , opTimes , opIDiv , opFDiv , opMod , opPlus , opMinus , opEq , opNe , opGt , opGe , opLt , opLe ) where import Sifflet.Text.Pretty -- | The class of types that can be parenthesized, that is, -- they may contain parentheses, and their parentheses may be altered. -- class Parenthesize a where -- alterParens :: (PExpr -> PExpr) -> a -> a -- ^^ Don't need a class for this! -- This doesn't seem right. It is too general. -- instance (Pretty a) => Pretty [a] where -- pretty as = sepCommaSp (map pretty as) prettyParens :: (Pretty a) => [a] -> String prettyParens = prettyList "(" ", " ")" prettyBrackets :: (Pretty a) => [a] -> String prettyBrackets = prettyList "[" ", " "]" -- | Python module -- essentially a list of statements; -- should it also have a name? data PModule = PModule [PStatement] deriving (Eq, Show) instance Pretty PModule where pretty (PModule ss) = sepLines2 (map pretty ss) -- | Python statement data PStatement = PReturn PExpr | PImport String -- ^ import statement | PCondS PExpr PStatement PStatement -- ^ if condition action alt-action | PFun PIdentifier [PParameter] PStatement -- ^ function name, formal parameters, body deriving (Eq, Show) instance Pretty PStatement where pretty s = case s of PReturn e -> "return " ++ pretty e PImport modName -> "import " ++ modName PCondS c a b -> sepLines ["if " ++ pretty c ++ ":", indentLine 4 (pretty a), "else:", indentLine 4 (pretty b)] PFun fid params body -> sepLines ["def " ++ pretty fid ++ prettyParens params ++ ":", indentLine 4 (pretty body)] -- | Python expression data PExpr = PCondE PExpr PExpr PExpr -- ^ if: condition, value, alt-value | PParen PExpr -- ^ expression in parentheses; is this needed? | PCall PExpr [PExpr] -- ^ function call: function expression (typically a PVariable), argument expressions | POperate POperator PExpr PExpr -- ^ binary operation: operator, left, right -- base cases | PVariable PIdentifier -- ^ variable identifier | PInt Integer | PFloat Double | PBool Bool | PString String deriving (Eq, Show) -- | PExpr as an instance of Pretty. -- The POperate case needs work to deal with precedences -- and avoid unnecessary parens instance Pretty PExpr where pretty pexpr = case pexpr of PCondE c a b -> unwords [pretty a, "if", pretty c, "else", pretty b] PParen e -> prettyParens [e] PVariable vid -> pretty vid PInt i -> show i PFloat x -> show x PBool b -> show b PString s -> show s PCall fexpr argExprs -> concat [pretty fexpr, prettyParens argExprs] POperate op left right -> unwords [pretty left, pretty op, pretty right] -- | Python identifier (variable name, etc.) data PIdentifier = PIdentifier String deriving (Eq, Show) instance Pretty PIdentifier where pretty (PIdentifier s) = s -- | Python function formal parameter data PParameter = PParameter PIdentifier deriving (Eq, Show) instance Pretty PParameter where pretty (PParameter pident) = pretty pident -- | Python operator, such as * or + data POperator = POperator {opName :: String, opPrec :: Precedence, opAssoc :: Bool -- ^ associative? } deriving (Eq, Show) instance Pretty POperator where pretty (POperator s _ _) = s -- | Operator priority, actually should be > 0 or >= 0 type Precedence = Int -- | Alter the parentheses of a statement by applying a -- transformer t to the expressions in the statement. alterParens :: (PExpr -> PExpr) -> PStatement -> PStatement alterParens t s = case s of PReturn e -> PReturn (t e) PCondS c a b -> PCondS (t c) (alterParens t a) (alterParens t b) PFun fid params b -> PFun fid params (alterParens t b) _ -> s atomic :: PExpr -> Bool atomic pexpr = case pexpr of PVariable _ -> True PInt _ -> True PFloat _ -> True PBool _ -> True PString _ -> True _ -> False compound :: PExpr -> Bool compound = not . atomic -- | Python return statement ret :: PExpr -> PStatement ret pexpr = PReturn pexpr -- | Python if STATEMENT -- This is the if STATEMENT: -- if c: -- a -- else: -- b -- -- But do I need this at all? condS :: PExpr -> PExpr -> PExpr -> PStatement condS c a b = PCondS c (ret a) (ret b) -- | Python if EXPRESSION -- This is the if EXPRESSION: -- "a if c else b", which means (in Haskell) "if c then a else b". -- I didn't even know there was such a thing! -- It works in both Python 2.6.5 and 3.1.2. condE :: PExpr -> PExpr -> PExpr -> PExpr condE c a b = PCondE c a b -- paren (PCondE c a b) -- PExpr smart constructors -- | Python variable var :: String -> PExpr var name = PVariable (PIdentifier name) -- | Python identifier ident :: String -> PIdentifier ident s = PIdentifier s -- | Python integer expression pInt :: Integer -> PExpr pInt i = PInt i -- | Python float expression pFloat :: Double -> PExpr pFloat x = PFloat x -- | Python boolean expression bool :: Bool -> PExpr bool b = PBool b -- | Python character expression = string expression with one character char :: Char -> PExpr char c = string [c] -- | Python string expression string :: String -> PExpr string s = PString s -- | Python expression in parentheses. -- Wraps parentheses around an expression. -- This is needed (at least sometimes!) -- in calls and binary operator applications. -- Also in condE! -- I'm doing it always to be safe (but ugly, not pretty!!) paren :: PExpr -> PExpr paren pexpr = PParen pexpr -- | Remove all grouping parentheses in expression. -- Does not affect parentheses required for function arguments -- or parameters. -- This will sometimes alter the semantics. -- I don't need noParens; it's just here as an exercise noParens :: PExpr -> PExpr noParens pexpr = let t = noParens in case pexpr of PParen e -> t e PCondE c a b -> PCondE (t c) (t a) (t b) PCall fe aes -> PCall (t fe) (map t aes) POperate op left right -> POperate op (t left) (t right) -- remaining cases are simple and therefore have no parens _ -> pexpr -- | Wrap each subexpression in grouping parentheses. -- This will typically look like too many parentheses. -- I don't need fullParens; it's just here as an exercise fullParens :: PExpr -> PExpr fullParens pexpr = let t = paren . fullParens in case pexpr of PCondE c a b -> PCondE (t c) (t a) (t b) PCall fe aes -> PCall (t fe) (map t aes) POperate op left right -> POperate op (t left) (t right) -- PParen and base cases need no more ()'s _ -> pexpr -- | Use parentheses for grouping where needed, -- but cautiously, erring on the side of extra parentheses if not sure -- they can be removed. bestParens :: PExpr -> PExpr bestParens = simplifyParens . fullParens -- | Remove grouping parentheses that are provably not needed. -- This may not remove *all* unnecessary grouping parentheses. -- You can always add more cases to make it better! simplifyParens :: PExpr -> PExpr simplifyParens pexpr = let t = simplifyParens ut = unpar . t in case pexpr of PParen e -> -- 1. Atomic expressions, like 5, do not need parens, -- because there is nothing to be grouped if atomic e then e else case e of -- function call (fact(n)) -> fact(n) PCall _ _ -> ut e _ -> PParen (t e) PCondE c a b -> PCondE (ut c) (ut a) (ut b) PCall fe aes -> PCall (t fe) (map ut aes) POperate op left right -> sop (POperate op (t left) (t right)) -- remaining cases are simple and therefore have no parens _ -> pexpr -- | Various rules for removing extra parentheses in operations. -- Probably incomplete. If the PExpr is not an operation, then -- it is passed through without change. sop :: PExpr -> PExpr sop = sopLeft . sopRight sopLeft :: PExpr -> PExpr sopLeft pexpr = case pexpr of POperate op1 (PParen (POperate op2 left2 right2)) right -> if opPrec op2 > opPrec op1 -- higher precedcence in left subtree -- e.g. (a * b) + c ==> a * b + c then POperate op1 (POperate op2 left2 right2) right else if opPrec op2 == opPrec op1 -- equal precedence operations, left to right -- e.g. (a + b) - c ==> a + b - c then POperate op1 (POperate op2 left2 right2) right else pexpr _ -> pexpr sopRight :: PExpr -> PExpr sopRight pexpr = case pexpr of POperate op1 left (PParen (POperate op2 left2 right2)) -> if opPrec op2 > opPrec op1 -- higher precedcence in left subtree -- e.g. (a * b) + c ==> a * b + c then POperate op1 left (POperate op2 left2 right2) else if op1 == op2 && opAssoc op1 -- associative operation, e.g. -- a + (b + c) ==> a + b + c then POperate op1 left (POperate op2 left2 right2) else pexpr _ -> pexpr -- | Adding and removing top-level parentheses. -- Axioms: par (unpar e) == e; unpar (par e) == e. -- | Add parentheses around an expression. Top level only. par :: PExpr -> PExpr par e = PParen e -- | Remove parentheses around an expression. Top level only. unpar :: PExpr -> PExpr unpar pexpr = case pexpr of PParen e -> e _ -> pexpr -- no-op -- | The "operator precedence" of an expression. -- If the expression is an operation, then this is the -- precedence of its operator; -- otherwise, it's not clear what it should be, but for now, -1. exprPrec :: PExpr -> Precedence exprPrec pexpr = case pexpr of POperate op _ _ -> opPrec op _ -> (-1) -- | Python function call expression call :: String -> [PExpr] -> PExpr call fname argExprs = PCall (var fname) argExprs -- arg :: PExpr -> PArgument -- arg expr = ArgExpr {arg_expr = expr, arg_annot = ()} -- | Python function formal parameter param :: String -> PParameter param name = PParameter (ident name) -- | Defines function definition fun :: String -> [String] -> PExpr -> PStatement fun fname paramNames bodyExpr = PFun (ident fname) (map param paramNames) (ret bodyExpr) -- | Binary operators -- Precedence levels are rather *informally* described in -- The Python Language Reference, -- http://docs.python.org/reference/. -- I am adopting the infixr levels from Haskell, -- which seem to be consistent with Python, -- at least for the operators that Sifflet uses. -- | Arithmetic operators -- + and - have lower precedence than *, /, //, % opTimes, opIDiv, opFDiv, opMod, opPlus, opMinus :: POperator opTimes = POperator "*" 7 True opIDiv = POperator "//" 7 False opFDiv = POperator "/" 7 False opMod = POperator "%" 7 False opPlus = POperator "+" 6 True opMinus = POperator "-" 6 False -- | Comparison operators have precedence lower than any arithmetic -- operator. Here, I've specified associative = False, -- because association doesn't even make sense; -- (a == b) == c is in general not well typed. opEq, opNe, opGt, opGe, opLt, opLe :: POperator opEq = POperator "==" 4 False opNe = POperator "!=" 4 False opGt = POperator ">" 4 False opGe = POperator ">=" 4 False opLt = POperator "<" 4 False opLe = POperator "<=" 4 False