{-# LANGUAGE EmptyDataDecls , OverloadedStrings , StandaloneDeriving , GeneralizedNewtypeDeriving , NoMonomorphismRestriction #-} {-| Bash statements and expressions. The statement tree is a functor, supporting arbitrary annotations; this is intended to support analysis of effects and privilege levels as well as commenting and arbitrary code inclusion. -} module Language.Bash.Syntax where import Prelude hiding (all, any, elem) import Control.Applicative import Control.Arrow ((***)) import Control.Monad import Data.Char import Data.String import Data.Maybe import Data.Word (Word8) import Data.ByteString.Char8 import Data.Foldable hiding (all, any, elem) import Data.Monoid import qualified Text.ShellEscape as Esc {-| The 'Annotated' type captures the annotatedness of a tree of Bash statements. It is 'Foldable' and a 'Functor'. -} data Annotated t = Annotated { annotation :: t , statement :: Statement t } deriving instance (Eq t) => Eq (Annotated t) deriving instance (Ord t) => Ord (Annotated t) deriving instance (Show t) => Show (Annotated t) instance Functor Annotated where fmap f (Annotated t stmt) = Annotated (f t) (fmap f stmt) instance Foldable Annotated where foldMap f (Annotated t stmt) = f t `mappend` foldMap f stmt {-| The 'Statement' type captures the different kind of statements that may exist in a Bash statement tree. It is mutually recursive with 'Annotated'. It is a 'Foldable' and a 'Functor'. -} data Statement t = Empty | SimpleCommand (Expression t) [Expression t] | NoOp ByteString | Bang (Annotated t) | AndAnd (Annotated t) (Annotated t) | OrOr (Annotated t) (Annotated t) | Pipe (Annotated t) (Annotated t) | Sequence (Annotated t) (Annotated t) | Background (Annotated t) (Annotated t) | Group (Annotated t) | Subshell (Annotated t) | Function FuncName (Annotated t) | IfThen (Annotated t) (Annotated t) | IfThenElse (Annotated t) (Annotated t) (Annotated t) | For Identifier [Expression t] (Annotated t) | Case (Expression t) [(Expression t, Annotated t)] | While (Annotated t) (Annotated t) | Until (Annotated t) (Annotated t) -- BraceBrace (ConditionalExpression t) | Assign (Assignment t) | Declare (Assignment t) | Local (Assignment t) | Export Identifier (Expression t) | IsSet VarName | ArrayUpdate Identifier (Expression t) (Expression t) | DictUpdate Identifier (Expression t) (Expression t) | Redirect (Annotated t) Redirection FileDescriptor (Either (Expression t) FileDescriptor) deriving instance (Eq t) => Eq (Statement t) deriving instance (Ord t) => Ord (Statement t) deriving instance (Show t) => Show (Statement t) instance Functor Statement where fmap f stmt = case stmt of Empty -> Empty SimpleCommand cmd args -> SimpleCommand (f' cmd) (fmap f' args) NoOp b -> NoOp b Bang ann -> Bang (f' ann) AndAnd ann ann' -> AndAnd (f' ann) (f' ann') OrOr ann ann' -> OrOr (f' ann) (f' ann') Pipe ann ann' -> Pipe (f' ann) (f' ann') Sequence ann ann' -> Sequence (f' ann) (f' ann') Background ann ann' -> Background (f' ann) (f' ann') Group ann -> Group (f' ann) Subshell ann -> Subshell (f' ann) Function fname ann -> Function fname (f' ann) IfThen ann ann' -> IfThen (f' ann) (f' ann') IfThenElse a a' a'' -> IfThenElse (f' a) (f' a') (f' a'') For ident args ann -> For ident (fmap f' args) (f' ann) Case expr cases -> Case (f' expr) (fmap (f' *** f') cases) While ann ann' -> While (f' ann) (f' ann') Until ann ann' -> Until (f' ann) (f' ann') -- BraceBrace (ConditionalExpression t) Assign a -> Assign (f' a) Declare a -> Declare (f' a) Local a -> Local (f' a) Export ident expr -> Export ident (f' expr) IsSet var -> IsSet var ArrayUpdate ident a b -> ArrayUpdate ident (f' a) (f' b) DictUpdate ident a b -> DictUpdate ident (f' a) (f' b) Redirect ann r fd chan -> Redirect (f' ann) r fd (fmapExprFD chan) where f' = fmap f fmapExprFD (Left expr) = Left (f' expr) fmapExprFD (Right fd) = Right fd instance Foldable Statement where foldMap f stmt = case stmt of Empty -> mempty SimpleCommand cmd args -> f' cmd `mappend` foldMap f' args NoOp _ -> mempty Bang ann -> f' ann AndAnd ann ann' -> f' ann `mappend` f' ann' OrOr ann ann' -> f' ann `mappend` f' ann' Pipe ann ann' -> f' ann `mappend` f' ann' Sequence ann ann' -> f' ann `mappend` f' ann' Background ann ann' -> f' ann `mappend` f' ann' Group ann -> f' ann Subshell ann -> f' ann Function _ ann -> f' ann IfThen ann ann' -> f' ann `mappend` f' ann' IfThenElse a a' a'' -> foldMap f' [a, a', a''] For _ args ann -> foldMap f' args `mappend` f' ann Case expr cases -> f' expr `mappend` foldMap foldMapPair cases While ann ann' -> f' ann `mappend` f' ann' Until ann ann' -> f' ann `mappend` f' ann' -- BraceBrace ConditionalExpression Assign a -> f' a Declare a -> f' a Local a -> f' a Export _ expr -> f' expr IsSet _ -> mempty ArrayUpdate _ a b -> f' a `mappend` f' b DictUpdate _ a b -> f' a `mappend` f' b Redirect ann _ _ chan -> f' ann `mappend` foldMapExprFD chan where f' = foldMap f foldMapExprFD (Left expr) = f' expr foldMapExprFD (Right _) = mempty foldMapPair (x, y) = f' x `mappend` f' y {-| The type of Bash expressions, handling many kinds of variable reference as well as eval and process substitution. It is 'Foldable' and a 'Functor'. -} data Expression t = Literal Esc.Bash | Asterisk | QuestionMark | Tilde | ReadVar VarName | ReadVarSafe VarName | ReadArray Identifier (Expression t) | ReadArraySafe Identifier (Expression t) | ARGVElements | ARGVLength | Elements Identifier | ElementsSafe Identifier | Keys Identifier | Length VarName | Trim Trim VarName (Expression t) | ArrayLength Identifier | Concat (Expression t) (Expression t) | Eval (Annotated t) | EvalUnquoted (Annotated t) | ProcessIn (Annotated t) | ProcessOut (Annotated t) -- TODO | IndirectExpansion Identifier -- TODO | Substring, Replacement, &c. deriving instance (Eq t) => Eq (Expression t) deriving instance (Ord t) => Ord (Expression t) deriving instance (Show t) => Show (Expression t) instance IsString (Expression t) where fromString = literal . fromString instance Functor Expression where fmap f expr = case expr of Literal esc -> Literal esc Asterisk -> Asterisk QuestionMark -> QuestionMark Tilde -> Tilde ReadVar v -> ReadVar v ReadVarSafe v -> ReadVarSafe v ReadArray ident expr -> ReadArray ident (fmap f expr) ReadArraySafe ident expr -> ReadArraySafe ident (fmap f expr) ARGVElements -> ARGVElements ARGVLength -> ARGVLength Elements ident -> Elements ident ElementsSafe ident -> Elements ident Keys ident -> Keys ident Length ident -> Length ident Trim trim v expr -> Trim trim v (fmap f expr) ArrayLength ident -> ArrayLength ident Concat expr expr' -> Concat (fmap f expr) (fmap f expr') Eval ann -> Eval (fmap f ann) EvalUnquoted ann -> EvalUnquoted (fmap f ann) ProcessIn ann -> ProcessIn (fmap f ann) ProcessOut ann -> ProcessOut (fmap f ann) instance Foldable Expression where foldMap f expr = case expr of Literal _ -> mempty Asterisk -> mempty QuestionMark -> mempty Tilde -> mempty ReadVar _ -> mempty ReadVarSafe _ -> mempty ReadArray _ expr -> foldMap f expr ReadArraySafe _ expr -> foldMap f expr ARGVElements -> mempty ARGVLength -> mempty Elements _ -> mempty ElementsSafe _ -> mempty Keys _ -> mempty Length _ -> mempty Trim _ _ expr -> foldMap f expr ArrayLength _ -> mempty Concat expr expr' -> foldMap f expr `mappend` foldMap f expr' Eval ann -> foldMap f ann EvalUnquoted ann -> foldMap f ann ProcessIn ann -> foldMap f ann ProcessOut ann -> foldMap f ann {-| Escape a 'ByteString' to produce a literal expression. -} literal :: ByteString -> Expression t literal = Literal . Esc.bash data VarName = VarIdent Identifier | VarSpecial SpecialVar deriving instance Eq VarName deriving instance Ord VarName deriving instance Show VarName instance IsString VarName where fromString = fromJust . varName . fromString varName :: ByteString -> Maybe VarName varName bytes = (VarSpecial <$> specialVar bytes) `mplus` (VarIdent <$> identifier bytes) {-| The type of legal Bash identifiers, strings beginning with letters or @_@ and containing letters, @_@ and digits. -} newtype Identifier = Identifier ByteString deriving instance Eq Identifier deriving instance Ord Identifier deriving instance Show Identifier instance IsString Identifier where fromString = fromJust . identifier . fromString {-| Produce an 'Identifier' from a 'ByteString' of legal format. -} identifier :: ByteString -> Maybe Identifier identifier bytes = do (c, bytes') <- uncons bytes if okayHead c && all okayTail bytes' then Just (Identifier bytes) else Nothing where okayTail c = (isAlphaNum c || c == '_') && isAscii c okayHead c = (isAlpha c || c == '_') && isAscii c {-| Bash functions can have surprising names. Once the word containing the name of the function has been identified by the Bash parser, the only constraint as of this writing is that it not be all digits and contain neither quotes nor dollar signs. Thus the following are all callable functions: > function http://duckduckgo.com { curl -sSfL http://duckduckgo.com?q="$1" ;} > function 123.0 { echo 123.0 ;} > function + { echo "$@" | sed 's/ / + /g' | bc ;} Yet a function name may only be parsed if its surroundings constitute a valid function declaration. So we are not able to declare these functions: > function par()ens { echo '(' "$@" ')' ;} > function (parens) { echo '(' "$@" ')' ;} (The parser thinks the parens are there to separate the function name from the function body.) Some functions can be declared but not called. For example: > function for { echo for ;} > function x=y { echo x is y ;} Calling the former results in a syntax error. A call to the latter is parsed as an assignment. It is possible to override important builtins with function declarations. For example: > function set { echo Haha! ;} > function declare { echo Surprise! ;} Overall, Bash function names are quite flexible but inconsistent and potentially a cause of grave errors. -} data FuncName = Simple Identifier | Fancy ByteString deriving instance Eq FuncName deriving instance Ord FuncName deriving instance Show FuncName instance IsString FuncName where fromString = fromJust . funcName . fromString {-| Produce a 'FuncName', choosing the 'Simple' constructor if the name is a simple identifier. -} funcName :: ByteString -> Maybe FuncName funcName bytes = (Simple <$> identifier bytes) <|> Fancy bytes <$ guard (not invalid) where invalid = all isDigit bytes || any (`elem` "()$'\"") bytes {-| The names of special variables, with otherwise illegal identifiers, are represented by this type. -} data SpecialVar = DollarQuestion | DollarHyphen | DollarDollar | DollarBang | DollarUnderscore | Dollar0 | Dollar1 | Dollar2 | Dollar3 | Dollar4 | Dollar5 | Dollar6 | Dollar7 | Dollar8 | Dollar9 deriving instance Eq SpecialVar deriving instance Ord SpecialVar deriving instance Show SpecialVar instance IsString SpecialVar where fromString = fromJust . specialVar . fromString {-| Try to render a 'SpecialVar' from a 'ByteString'. -} specialVar :: ByteString -> Maybe SpecialVar specialVar b | "$?" == b = Just DollarQuestion | "$-" == b = Just DollarHyphen | "$$" == b = Just DollarDollar | "$!" == b = Just DollarBang | "$_" == b = Just DollarUnderscore | "$0" == b = Just Dollar0 | "$1" == b = Just Dollar1 | "$2" == b = Just Dollar2 | "$3" == b = Just Dollar3 | "$4" == b = Just Dollar4 | "$5" == b = Just Dollar5 | "$6" == b = Just Dollar6 | "$7" == b = Just Dollar7 | "$8" == b = Just Dollar8 | "$9" == b = Just Dollar9 | otherwise = Nothing specialVarBytes :: SpecialVar -> ByteString specialVarBytes DollarQuestion = "$?" specialVarBytes DollarHyphen = "$-" specialVarBytes DollarDollar = "$$" specialVarBytes DollarBang = "$!" specialVarBytes DollarUnderscore = "$_" specialVarBytes Dollar0 = "$0" specialVarBytes Dollar1 = "$1" specialVarBytes Dollar2 = "$2" specialVarBytes Dollar3 = "$3" specialVarBytes Dollar4 = "$4" specialVarBytes Dollar5 = "$5" specialVarBytes Dollar6 = "$6" specialVarBytes Dollar7 = "$7" specialVarBytes Dollar8 = "$8" specialVarBytes Dollar9 = "$9" data Trim = ShortestLeading | LongestLeading | ShortestTrailing | LongestTrailing deriving instance Eq Trim deriving instance Ord Trim deriving instance Show Trim {-| A file descriptor in Bash is simply a number between 0 and 255. -} newtype FileDescriptor = FileDescriptor Word8 deriving instance Eq FileDescriptor deriving instance Ord FileDescriptor deriving instance Num FileDescriptor deriving instance Show FileDescriptor {-| Redirection \"directions\". -} data Redirection = In -- ^ Input redirection, @<@. | Out -- ^ Output redirection, @>@. | Append -- ^ Appending output, @>>@. deriving instance Eq Redirection deriving instance Ord Redirection deriving instance Show Redirection {-| Unused at present. -} data ConditionalExpression t = File_a (Expression t) | File_b (Expression t) | File_c (Expression t) | File_d (Expression t) | File_e (Expression t) | File_f (Expression t) | File_g (Expression t) | File_h (Expression t) | File_k (Expression t) | File_p (Expression t) | File_r (Expression t) | File_s (Expression t) | File_t (Expression t) | File_u (Expression t) | File_w (Expression t) | File_x (Expression t) | File_O (Expression t) | File_G (Expression t) | File_L (Expression t) | File_S (Expression t) | File_N (Expression t) | File_nt (Expression t) (Expression t) | File_ot (Expression t) (Expression t) | File_ef (Expression t) (Expression t) | OptSet (Expression t) | StringEmpty (Expression t) | StringNonempty (Expression t) | StringEq (Expression t) (Expression t) | StringNotEq (Expression t) (Expression t) | StringLT (Expression t) (Expression t) | StringGT (Expression t) (Expression t) | StringRE (Expression t) (Expression t) | NumEq (Expression t) (Expression t) | NumNotEq (Expression t) (Expression t) | NumLT (Expression t) (Expression t) | NumLEq (Expression t) (Expression t) | NumGT (Expression t) (Expression t) | NumGEq (Expression t) (Expression t) | Not (Expression t) (Expression t) | And (Expression t) (Expression t) | Or (Expression t) (Expression t) deriving instance (Eq t) => Eq (ConditionalExpression t) deriving instance (Ord t) => Ord (ConditionalExpression t) deriving instance (Show t) => Show (ConditionalExpression t) data Assignment t = Var Identifier (Expression t) | Array Identifier [Expression t] | Dict Identifier [(Expression t, Expression t)] deriving instance (Eq t) => Eq (Assignment t) deriving instance (Ord t) => Ord (Assignment t) deriving instance (Show t) => Show (Assignment t) instance Functor Assignment where fmap f assign = case assign of Var ident expr -> Var ident (f' expr) Array ident assigns -> Array ident (fmap f' assigns) Dict ident assigns -> Dict ident (fmap (f' *** f') assigns) where f' = fmap f instance Foldable Assignment where foldMap f assign = case assign of Var _ expr -> f' expr Array _ assigns -> foldMap f' assigns Dict _ assigns -> foldMap foldMapPair assigns where f' = foldMap f foldMapPair (x, y) = f' x `mappend` f' y