{- Copyright 2012-2015 Vidar Holen This file is part of ShellCheck. http://www.vidarholen.net/contents/shellcheck ShellCheck is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. ShellCheck is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . -} module ShellCheck.ASTLib where import ShellCheck.AST import Control.Monad.Writer import Control.Monad import Data.Functor import Data.List import Data.Maybe -- Is this a type of loop? isLoop t = case t of T_WhileExpression {} -> True T_UntilExpression {} -> True T_ForIn {} -> True T_ForArithmetic {} -> True T_SelectIn {} -> True _ -> False -- Will this split into multiple words when used as an argument? willSplit x = case x of T_DollarBraced {} -> True T_DollarExpansion {} -> True T_Backticked {} -> True T_BraceExpansion {} -> True T_Glob {} -> True T_Extglob {} -> True T_NormalWord _ l -> any willSplit l _ -> False isGlob T_Extglob {} = True isGlob T_Glob {} = True isGlob (T_NormalWord _ l) = any isGlob l isGlob _ = False -- Is this shell word a constant? isConstant token = case token of -- This ignores some cases like ~"foo": T_NormalWord _ (T_Literal _ ('~':_) : _) -> False T_NormalWord _ l -> all isConstant l T_DoubleQuoted _ l -> all isConstant l T_SingleQuoted _ _ -> True T_Literal _ _ -> True _ -> False -- Is this an empty literal? isEmpty token = case token of T_NormalWord _ l -> all isEmpty l T_DoubleQuoted _ l -> all isEmpty l T_SingleQuoted _ "" -> True T_Literal _ "" -> True _ -> False -- Quick&lazy oversimplification of commands, throwing away details -- and returning a list like ["find", ".", "-name", "${VAR}*" ]. oversimplify token = case token of (T_NormalWord _ l) -> [concat (concatMap oversimplify l)] (T_DoubleQuoted _ l) -> [concat (concatMap oversimplify l)] (T_SingleQuoted _ s) -> [s] (T_DollarBraced _ _) -> ["${VAR}"] (T_DollarArithmetic _ _) -> ["${VAR}"] (T_DollarExpansion _ _) -> ["${VAR}"] (T_Backticked _ _) -> ["${VAR}"] (T_Glob _ s) -> [s] (T_Pipeline _ _ [x]) -> oversimplify x (T_Literal _ x) -> [x] (T_ParamSubSpecialChar _ x) -> [x] (T_SimpleCommand _ vars words) -> concatMap oversimplify words (T_Redirecting _ _ foo) -> oversimplify foo (T_DollarSingleQuoted _ s) -> [s] (T_Annotation _ _ s) -> oversimplify s -- Workaround for let "foo = bar" parsing (TA_Sequence _ [TA_Expansion _ v]) -> concatMap oversimplify v _ -> [] -- Turn a SimpleCommand foo -avz --bar=baz into args "a", "v", "z", "bar", -- each in a tuple of (token, stringFlag). Non-flag arguments are added with -- stringFlag == "". getFlagsUntil stopCondition (T_SimpleCommand _ _ (_:args)) = let tokenAndText = map (\x -> (x, concat $ oversimplify x)) args (flagArgs, rest) = break (stopCondition . snd) tokenAndText in concatMap flag flagArgs ++ map (\(t, _) -> (t, "")) rest where flag (x, '-':'-':arg) = [ (x, takeWhile (/= '=') arg) ] flag (x, '-':args) = map (\v -> (x, [v])) args flag (x, _) = [ (x, "") ] getFlagsUntil _ _ = error "Internal shellcheck error, please report! (getFlags on non-command)" -- Get all flags in a GNU way, up until -- getAllFlags = getFlagsUntil (== "--") -- Get all flags in a BSD way, up until first non-flag argument or -- getLeadingFlags = getFlagsUntil (\x -> x == "--" || (not $ "-" `isPrefixOf` x)) -- Check if a command has a flag. hasFlag cmd str = str `elem` (map snd $ getAllFlags cmd) -- Is this token a word that starts with a dash? isFlag token = case getWordParts token of T_Literal _ ('-':_) : _ -> True _ -> False -- Is this token a flag where the - is unquoted? isUnquotedFlag token = fromMaybe False $ do str <- getLeadingUnquotedString token return $ "-" `isPrefixOf` str -- Given a T_DollarBraced, return a simplified version of the string contents. bracedString (T_DollarBraced _ l) = concat $ oversimplify l bracedString _ = error "Internal shellcheck error, please report! (bracedString on non-variable)" -- Is this an expansion of multiple items of an array? isArrayExpansion t@(T_DollarBraced _ _) = let string = bracedString t in "@" `isPrefixOf` string || not ("#" `isPrefixOf` string) && "[@]" `isInfixOf` string isArrayExpansion _ = False -- Is it possible that this arg becomes multiple args? mayBecomeMultipleArgs t = willBecomeMultipleArgs t || f t where f t@(T_DollarBraced _ _) = let string = bracedString t in "!" `isPrefixOf` string f (T_DoubleQuoted _ parts) = any f parts f (T_NormalWord _ parts) = any f parts f _ = False -- Is it certain that this word will becomes multiple words? willBecomeMultipleArgs t = willConcatInAssignment t || f t where f T_Extglob {} = True f T_Glob {} = True f T_BraceExpansion {} = True f (T_DoubleQuoted _ parts) = any f parts f (T_NormalWord _ parts) = any f parts f _ = False -- This does token cause implicit concatenation in assignments? willConcatInAssignment token = case token of t@T_DollarBraced {} -> isArrayExpansion t (T_DoubleQuoted _ parts) -> any willConcatInAssignment parts (T_NormalWord _ parts) -> any willConcatInAssignment parts _ -> False -- Maybe get the literal string corresponding to this token getLiteralString :: Token -> Maybe String getLiteralString = getLiteralStringExt (const Nothing) -- Definitely get a literal string, skipping over all non-literals onlyLiteralString :: Token -> String onlyLiteralString = fromJust . getLiteralStringExt (const $ return "") -- Maybe get a literal string, but only if it's an unquoted argument. getUnquotedLiteral (T_NormalWord _ list) = concat <$> mapM str list where str (T_Literal _ s) = return s str _ = Nothing getUnquotedLiteral _ = Nothing -- Get the last unquoted T_Literal in a word like "${var}foo"THIS -- or nothing if the word does not end in an unquoted literal. getTrailingUnquotedLiteral :: Token -> Maybe Token getTrailingUnquotedLiteral t = case t of (T_NormalWord _ list@(_:_)) -> from (last list) _ -> Nothing where from t = case t of T_Literal {} -> return t _ -> Nothing -- Get the leading, unquoted, literal string of a token (if any). getLeadingUnquotedString :: Token -> Maybe String getLeadingUnquotedString t = case t of T_NormalWord _ ((T_Literal _ s) : _) -> return s _ -> Nothing -- Maybe get the literal string of this token and any globs in it. getGlobOrLiteralString = getLiteralStringExt f where f (T_Glob _ str) = return str f _ = Nothing -- Maybe get the literal value of a token, using a custom function -- to map unrecognized Tokens into strings. getLiteralStringExt :: (Token -> Maybe String) -> Token -> Maybe String getLiteralStringExt more = g where allInList = fmap concat . mapM g g (T_DoubleQuoted _ l) = allInList l g (T_DollarDoubleQuoted _ l) = allInList l g (T_NormalWord _ l) = allInList l g (TA_Expansion _ l) = allInList l g (T_SingleQuoted _ s) = return s g (T_Literal _ s) = return s g (T_ParamSubSpecialChar _ s) = return s g x = more x -- Is this token a string literal? isLiteral t = isJust $ getLiteralString t -- Turn a NormalWord like foo="bar $baz" into a series of constituent elements like [foo=,bar ,$baz] getWordParts (T_NormalWord _ l) = concatMap getWordParts l getWordParts (T_DoubleQuoted _ l) = l -- TA_Expansion is basically T_NormalWord for arithmetic expressions getWordParts (TA_Expansion _ l) = concatMap getWordParts l getWordParts other = [other] -- Return a list of NormalWords that would result from brace expansion braceExpand (T_NormalWord id list) = take 1000 $ do items <- mapM part list return $ T_NormalWord id items where part (T_BraceExpansion id items) = do item <- items braceExpand item part x = return x -- Maybe get a SimpleCommand from immediate wrappers like T_Redirections getCommand t = case t of T_Redirecting _ _ w -> getCommand w T_SimpleCommand _ _ (w:_) -> return t T_Annotation _ _ t -> getCommand t _ -> Nothing -- Maybe get the command name of a token representing a command getCommandName t = do (T_SimpleCommand _ _ (w:rest)) <- getCommand t s <- getLiteralString w if "busybox" `isSuffixOf` s then case rest of (applet:_) -> getLiteralString applet _ -> return s else return s -- If a command substitution is a single command, get its name. -- $(date +%s) = Just "date" getCommandNameFromExpansion :: Token -> Maybe String getCommandNameFromExpansion t = case t of T_DollarExpansion _ [c] -> extract c T_Backticked _ [c] -> extract c T_DollarBraceCommandExpansion _ [c] -> extract c _ -> Nothing where extract (T_Pipeline _ _ [cmd]) = getCommandName cmd extract _ = Nothing -- Get the basename of a token representing a command getCommandBasename = fmap basename . getCommandName where basename = reverse . takeWhile (/= '/') . reverse isAssignment t = case t of T_Redirecting _ _ w -> isAssignment w T_SimpleCommand _ (w:_) [] -> True T_Assignment {} -> True T_Annotation _ _ w -> isAssignment w _ -> False isOnlyRedirection t = case t of T_Pipeline _ _ [x] -> isOnlyRedirection x T_Annotation _ _ w -> isOnlyRedirection w T_Redirecting _ (_:_) c -> isOnlyRedirection c T_SimpleCommand _ [] [] -> True _ -> False isFunction t = case t of T_Function {} -> True; _ -> False isBraceExpansion t = case t of T_BraceExpansion {} -> True; _ -> False -- Get the lists of commands from tokens that contain them, such as -- the body of while loops or branches of if statements. getCommandSequences :: Token -> [[Token]] getCommandSequences t = case t of T_Script _ _ cmds -> [cmds] T_BraceGroup _ cmds -> [cmds] T_Subshell _ cmds -> [cmds] T_WhileExpression _ _ cmds -> [cmds] T_UntilExpression _ _ cmds -> [cmds] T_ForIn _ _ _ cmds -> [cmds] T_ForArithmetic _ _ _ _ cmds -> [cmds] T_IfExpression _ thens elses -> map snd thens ++ [elses] T_Annotation _ _ t -> getCommandSequences t _ -> [] -- Get a list of names of associative arrays getAssociativeArrays t = nub . execWriter $ doAnalysis f t where f :: Token -> Writer [String] () f t@T_SimpleCommand {} = fromMaybe (return ()) $ do name <- getCommandName t let assocNames = ["declare","local","typeset"] guard $ elem name assocNames let flags = getAllFlags t guard $ elem "A" $ map snd flags let args = map fst . filter ((==) "" . snd) $ flags let names = mapMaybe (getLiteralStringExt nameAssignments) args return $ tell names f _ = return () nameAssignments t = case t of T_Assignment _ _ name _ _ -> return name _ -> Nothing -- A Pseudoglob is a wildcard pattern used for checking if a match can succeed. -- For example, [[ $(cmd).jpg == [a-z] ]] will give the patterns *.jpg and ?, which -- can be proven never to match. data PseudoGlob = PGAny | PGMany | PGChar Char deriving (Eq, Show) -- Turn a word into a PG pattern, replacing all unknown/runtime values with -- PGMany. wordToPseudoGlob :: Token -> Maybe [PseudoGlob] wordToPseudoGlob word = simplifyPseudoGlob . concat <$> mapM f (getWordParts word) where f x = case x of T_Literal _ s -> return $ map PGChar s T_SingleQuoted _ s -> return $ map PGChar s T_DollarBraced {} -> return [PGMany] T_DollarExpansion {} -> return [PGMany] T_Backticked {} -> return [PGMany] T_Glob _ "?" -> return [PGAny] T_Glob _ ('[':_) -> return [PGAny] T_Glob {} -> return [PGMany] T_Extglob {} -> return [PGMany] _ -> return [PGMany] -- Turn a word into a PG pattern, but only if we can preserve -- exact semantics. wordToExactPseudoGlob :: Token -> Maybe [PseudoGlob] wordToExactPseudoGlob word = simplifyPseudoGlob . concat <$> mapM f (getWordParts word) where f x = case x of T_Literal _ s -> return $ map PGChar s T_SingleQuoted _ s -> return $ map PGChar s T_Glob _ "?" -> return [PGAny] T_Glob _ "*" -> return [PGMany] _ -> fail "Unknown token type" -- Reorder a PseudoGlob for more efficient matching, e.g. -- f?*?**g -> f??*g simplifyPseudoGlob :: [PseudoGlob] -> [PseudoGlob] simplifyPseudoGlob = f where f [] = [] f (x@(PGChar _) : rest ) = x : f rest f list = let (anys, rest) = span (\x -> x == PGMany || x == PGAny) list in order anys ++ f rest order s = let (any, many) = partition (== PGAny) s in any ++ take 1 many -- Check whether the two patterns can ever overlap. pseudoGlobsCanOverlap :: [PseudoGlob] -> [PseudoGlob] -> Bool pseudoGlobsCanOverlap = matchable where matchable x@(xf:xs) y@(yf:ys) = case (xf, yf) of (PGMany, _) -> matchable x ys || matchable xs y (_, PGMany) -> matchable x ys || matchable xs y (PGAny, _) -> matchable xs ys (_, PGAny) -> matchable xs ys (_, _) -> xf == yf && matchable xs ys matchable [] [] = True matchable (PGMany : rest) [] = matchable rest [] matchable (_:_) [] = False matchable [] r = matchable r [] -- Check whether the first pattern always overlaps the second. pseudoGlobIsSuperSetof :: [PseudoGlob] -> [PseudoGlob] -> Bool pseudoGlobIsSuperSetof = matchable where matchable x@(xf:xs) y@(yf:ys) = case (xf, yf) of (PGMany, PGMany) -> matchable x ys (PGMany, _) -> matchable x ys || matchable xs y (_, PGMany) -> False (PGAny, _) -> matchable xs ys (_, PGAny) -> False (_, _) -> xf == yf && matchable xs ys matchable [] [] = True matchable (PGMany : rest) [] = matchable rest [] matchable _ _ = False wordsCanBeEqual x y = fromMaybe True $ liftM2 pseudoGlobsCanOverlap (wordToPseudoGlob x) (wordToPseudoGlob y)