{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-} -- |Here we use the stuff defined in the AST and Parsec modules -- to parse things. module Language.Sh.Parser ( parse, hereDocsComplete ) where import Language.Sh.Parser.Internal import Language.Sh.Parser.Parsec import Language.Sh.Syntax import Language.Sh.Map import Language.Sh.Compat ( (<=<) ) import Text.ParserCombinators.Parsec.Error ( ParseError ) import Text.ParserCombinators.Parsec ( choice, manyTill, eof, many1, skipMany, optional, (<|>), (), many, try, count, sepBy1, notFollowedBy, lookAhead, getInput, setInput, runParser ) import Control.Monad ( unless, when, liftM2, ap, guard ) import Data.List ( (\\) ) import Data.Char ( isDigit ) import Data.Maybe ( isJust, catMaybes ) import Debug.Trace ( trace ) -- We don't actually really need Parsec3 - could adapt that Parsec2 source... -- Also, this should maybe be a debug switch...? -- ifdef HAVE_PARSEC3 -- include "Language/Sh/Parser/safemany.h" -- endif data WordContext = NormalContext | ParameterContext | HereEndContext deriving ( Enum, Ord, Eq ) delimiters :: WordContext -> String delimiters NormalContext = "&|;<>()# \t\r\n" delimiters ParameterContext = "}" -- don't delimit spaces yet delimiters HereEndContext = delimiters NormalContext -- "&|;<>()# \t\r\n" lookaheadNormalDelimiter :: P () lookaheadNormalDelimiter = lookAhead $ oneOf (delimiters NormalContext) >> return () cnewline :: P () cnewline = do newline <|> (do char '#' skipMany (noneOf "\n\r") newline <|> eof) "" spaces eatNewlines :: P a -> P a eatNewlines a = do a' <- a newlines return a' -- Need to work on error parsing for the function def... -- at least, in the case of heredocs inside (illegal?) it seems to fail statement :: P Statement statement = do aliasOn choice [try $ do name <- basicName spaces >> char '(' >> spaces char ')' <|> unexpectedNoEOF spaces >> newlines -- optional FunctionDefinition name `fmap` compoundStatement `ap` many redirection ,Compound `fmap` compoundStatement `ap` many redirection ,do s <- statementNoSS case s of -- needed to prevent errors w/ 'many' OrderedStatement [] -> fail "empty statement" s -> return s ] -- Once we know we don't have a subshell... -- We could probably wrap these into one function, since there's a fair -- amount of repitition here... statementNoSS :: P Statement statementNoSS = do aliasOn choice [expandAlias >> statementNoSS ,try $ do a <- assignment fmap (addAssignment a) statementNoSS ,try $ do r <- redirection fmap (addRedirection r) statementNoSS ,simpleStatement] simpleStatement :: P Statement simpleStatement = choice [expandAlias >> simpleStatement ,try $ do r <- redirection fmap (addRedirection r) simpleStatement ,try $ do w <- word NormalContext fmap (addWord w) simpleStatement ,return $ OrderedStatement []] expandAlias :: P () -- lookAhead expandAlias = try $ do (aok,as,ip) <- getAliasInfo unless aok $ fail "" a <- many $ noneOf "\\\"'()|&;<> \t\r\n" -- correct set? case lookup a as of Nothing -> fail "" Just s -> injectAlias a s as ip -- |We do some weird (scary) stuff here... in particular, we inject -- the control codes /after/ the first character in the stream, which -- must have been a delimiter of some sort. This is so that /all/ the -- sub-expansions that occur here will stack properly and /not/ consume -- the @Aliases@ token prematurely, thus permanently losing the outermost -- alias. I.e. -- $ alias foo="echo "; alias bar=foo -- $ foo bar bar -- If we just inject before @i@ then we end up with "echo echo bar", because -- the bar expands to "foo\CTRL ..." and then the control codes get eaten -- up when expanding foo, and that's bad. injectAlias :: String -> String -> [(String,String)] -> Bool -> P () injectAlias a s as ip = do i <- getInput let (h,t) = splitAt 1 i aOn = if isBlank $ last s then (Ctl (AliasOn True):) else id -- don't turn /off/ setInput $ map Chr s ++ Ctl (IncPos ip):h ++ Ctl (Aliases as): -- These next two may be gratuitous aOn t setAliasInfo (True,as\\[(a,s)],False) unless True $ do l <- getInput setInput l -- $ trace ("input: "++show l) l spaces pipeline :: P Pipeline pipeline = (try $ do reservedWord "!" fmap BangPipeline $ statement `sepBy1` pipe ) <|> (fmap Pipeline $ statement `sepBy1` pipe) pipe :: P () pipe = try $ do char '|' notFollowedBy $ fmap Chr $ char '|' spaces andorlist :: P AndOrList andorlist = assocL pipeline (try $ (operator "||" >> return (:||:)) <|> (operator "&&" >> return (:&&:))) Singleton reservedWord :: String -> P String reservedWord s = try $ do s' <- string s show s lookaheadNormalDelimiter <|> eof spaces return s' reservedWord_ :: String -> P () reservedWord_ s = reservedWord s >> return () isOperator :: String -> Bool isOperator x = x `elem` [">",">>",">|","<","<>","<<","<<-",">&","<&", "|","||","&","&&",";",";;","(",")"] operator :: String -> P String operator s = try $ do string s eof <|> (do c <- lookAhead anyChar guard $ not $ isOperator $ s++[c]) spaces return s operator_ :: String -> P () operator_ s = operator s >> return () inClause :: P [Word] inClause = choice [try $ do optional sequentialSep reservedWord "do" return defaultIn ,do newlines reservedWord "in" <|> unexpected ws <- many (word NormalContext) sequentialSep <|> unexpected reservedWord "do" <|> unexpected return ws] where defaultIn = [[Quoted $ Expand $ SimpleExpansion "@"]] cases :: P [([Word],[Command])] cases = manyTill line $ reservedWord "esac" where line = do ip <- insideParens if ip then operator_ "(" <|> unexpected else optional $ operator_ "(" pats <- word NormalContext `sepBy1` operator "|" operator ")" <|> unexpectedNoEOF (cmds,_) <- commandsTill dsemi return (pats,cmds) dsemi :: P String dsemi = operator ";;" <|> lookAhead (reservedWord "esac") "`;;' or `esac'" -- |Parse any of the compound statements: @if@, @for@, subshells, -- brace groups, ... compoundStatement :: P CompoundStatement compoundStatement = choice [do reservedWord "for" name <- basicName <|> unexpectedNoEOF vallist <- inClause (cs,_) <- commandsTill (reservedWord "done") return $ For name vallist cs ,do reservedWord "while" (cond,_) <- commandsTill $ reservedWord "do" (code,_) <- commandsTill $ reservedWord "done" return $ While cond code ,do reservedWord "until" (cond,_) <- commandsTill $ reservedWord "do" (code,_) <- commandsTill $ reservedWord "done" return $ Until cond code ,do reservedWord "if" parseIf -- recursive b/c of elif ,do reservedWord "case" expr <- word NormalContext <|> unexpectedNoEOF newlines reservedWord "in" <|> unexpected newlines what <- cases return $ Case expr what ,do operator "(" openParen cs <- many command operator ")" <|> unexpected closeParen return $ Subshell cs ,do reservedWord "{" (cs,_) <- commandsTill $ reservedWord "}" return $ BraceGroup cs ] parseIf :: P CompoundStatement parseIf = do (cond,_) <- commandsTill $ reservedWord "then" (thn,next) <- commandsTill $ choice [reservedWord "elif" ,reservedWord "else" ,reservedWord "fi"] case next of "else" -> do (els,_) <- commandsTill $ reservedWord "fi" return $ If cond thn els "elif" -> do elif <- parseIf return $ If cond thn $ compound elif "fi" -> return $ If cond thn [] where compound x = [Synchronous $ Singleton $ Pipeline [Compound x []]] -- |Here is where we need to be careful about parens, at least once we -- get to the case statements...? -- |Also, we can use 'commandTerminator' to substitute heredocs safely because -- @<<@ are not allowed in non-command arguments to control structures anyway. -- Note that this code is duplicated in the code for @case@ statements! command :: P Command command = do c <- andorlist "list" t <- commandTerminator "terminator" return $ if t then Asynchronous c else Synchronous c unlessM :: Monad m => m Bool -> m () -> m () unlessM cond job = cond >>= (unless `flip` job) readHDs :: P () readHDs = do hd <- nextHereDoc case hd of Just s -> readHD s >> readHDs Nothing -> return () sequentialSep :: P () sequentialSep = choice [operator ";" >> return () ,cnewline >> readHDs ,eof >> closeHereDocs -- ? ,do unlessM insideParens $ fail "" lookAhead $ operator ")" return () ] >> newlines commandTerminator :: P Bool commandTerminator = (operator "&" >> newlines >> return True) <|> (sequentialSep >> return False) <|> (lookAhead (operator_ ";;") >> return False) "terminator" manyTill' :: P a -> P end -> P ([a],end) manyTill' p end = scan where scan = do e <- end return ([],e) <|> do x <- p (xs,e) <- scan return ((x:xs),e) -- |Given a delimiter, parses a heredoc and moves the delimiter off the -- delimiter list and instead replaces the replacement text onto the -- 'readHereDocs' list. Note that we want to end with a newline, but it's -- being read by the "till" parser. Instead, we use a 'wPutStrLn' in the -- 'Shell' module, rather than attempting to add the newline back in here. readHD :: String -> P () readHD delim = popHereDoc =<< manyTill' (dqLex "\\$`") (choice [try $ do newline string delim newline <|> eof return True ,eof >> return False]) dqLex :: String -> P Lexeme -- input: chars to escape with '\\' dqLex escape = choice [do char '\\' choice [newline >> dqLex escape ,ql `fmap` oneOf escape ,return $ ql '\\' ] ,Quoted `fmap` expansion ,ql `fmap` anyChar ] -- |Nothing left after command terminator, so turn all the heredocs into -- empty @False@s. closeHereDocs :: P () closeHereDocs = do hd <- nextHereDoc case hd of Nothing -> return () Just _ -> popHereDoc ([],False) >> closeHereDocs -- |How can the many cnewline possibly fail...? If spaces end in something -- else... So we should move over to gobbling spaces after words, rather -- than before... newlines :: P () newlines = (try (skipMany cnewline) <|> return ()) >> spaces -- |Parse a single word. We need to take a @String@ input so that -- we can conditionally end on certain delimiters, e.g. @}@. -- Note that #()|&<>; are in fact all allowed inside ${A:- }, so -- we'll need to take them all as inputs. word :: WordContext -> P Word word context = do ip <- insideParens -- ')' below was '('; only mattered in {} let del = (if ip then (')':) else id) $ delimiters context w <- fmap concat $ word' del <:> many (word' $ del\\"#") spaces return w where word' :: String -> P Word word' s = choice [do char '\\' try (newline >> return []) <|> do c <- anyChar return [Quote '\\',ql c] ,do char '"' w <- dqWord char '"' return $ Quote '"':w++[Quote '"'] ,do char '\'' w <- many $ noneOf "\'" char '\'' return $ Quote '\'':map ql w++[Quote '\''] ,do when (context==HereEndContext) $ fail "" one expansion ,do c <- noneOf s return [Literal c] ] "word" dqWord :: P Word dqWord = fmap concat $ many $ choice [do char '\\' choice [newline >> return [] ,map ql `fmap` one (oneOf "\\$`\"") ,return $ [ql '\\'] ] ,map Quoted `fmap` one expansion ,map ql `fmap` one (noneOf "\"") ] -- This needs to reject, e.g. "a " but for some reason "${a }" doesn't fail isName :: String -> Bool isName s = case parse' [] (try (only name) <|> only (many1 digit)) s of Right _ -> True Left _ -> False -- dqWord :: P Word -- dqWord = manyTill (dqLex "\\$`\"") (char '"') -- dqLex :: String -> P Lexeme -- input: chars to escape with \ -- dqLex escape = choice [do char '\\' -- choice [newline >> dqLex escape -- ,ql `fmap` oneOf escape -- ,return $ ql '\\' -- ] -- ,Quoted `fmap` expansion -- ,ql `fmap` anyChar -- ] -- Technically, we're not saving the \ quotes here....... does this matter? expansion :: P Lexeme expansion = choice [do char '$' choice [try $ do n <- name return $ Expand $ SimpleExpansion n ,do char '{' choice [do char '#' n <- many $ noneOf "}" char '}' if isName n then return $ Expand $ LengthExpansion n else fatal $ "${#"++n++"}: bad substitution" ,try $ do n <- name -- many $ noneOf ":-=?+#%" -- check isName again...? (c,op) <- modifier rest <- word ParameterContext char '}' <|> (char ')' >> unexEOF) return $ Expand $ ModifiedExpansion n op c rest ,do ip <- insideParens let p = if ip then (')':) else id n <- many $ noneOf $ p "}" char '}' <|> (char ')' >> unexEOF) if isName n then return $ Expand $ SimpleExpansion n else fatal $ "${"++n++ "}: bad substitution"] ,do char '(' openParen l <- choice [do char '(' a <- arithmetic -- use parenDepth? return $ Expand a ,do c <- commands char ')' return $ Expand $ CommandSub c] closeParen return l ,return $ Literal '$' ] ,do char '`' s <- fmap catMaybes $ many $ escape "`$\\" <|> Just `fmap` noneOf "`" char '`' (_,as,_) <- getAliasInfo -- cf. bash: alias foo='`foo`' case parse' as (only commands) s of Left err -> fatal $ "command substitution: " ++ show (unFatal err) Right cs -> return $ Expand $ CommandSub cs ] where unexEOF = fatal "unexpected EOF while looking for matching `}'" modifier = choice [do c <- zeroOne $ char ':' op <- oneOf "-=?+" return (not $ null c,op) ,do op <- oneOf "#%" c <- zeroOne $ char op return (not $ null c,op)] arithmetic :: P Expansion arithmetic = do w <- arithWord =<< getParenDepth return $ Arithmetic $ ql '(':w arithWord :: Int -> P Word arithWord d0 = aw -- now we can forget about d0 where aw = do d <- getParenDepth if d==d0-1 then (char ')' >> openParen >> return []) <|> (openParen >> aw') else aw' aw' :: P Word aw' = choice [do char '\\' choice [newline >> aw' ,liftM2 (:) (ql `fmap` oneOf "\\$`\"") aw' ,liftM2 (\c r -> ql '\\':ql c:r) anyChar aw'] ,do { ex <- expansion; fmap (Quoted ex:) aw' } ,char '(' >> openParen >> (ql '(':) `fmap` aw' ,char ')' >> closeParen >> (ql ')':) `fmap` aw -- no ' ,liftM2 (:) (ql `fmap` anyChar) aw' ] "arithmetic word" {- TEST: $ alias bar=foo $ alias foo='echo $(bar)' $ foo dash: foo: not found -} escape :: String -> P (Maybe Char) escape s = char '\\' >> choice [newline >> return Nothing ,Just `fmap` oneOf s ,return $ Just '\\'] name :: P String name = count 1 (oneOf "@*#?-$!" <|> digit) <|> alphaUnder <:> many alphaUnderNum "name" basicName :: P String basicName = token (alphaUnder <:> many alphaUnderNum) "name" assignment :: P Assignment assignment = do var <- basicName "name" char '=' val <- fmap concat $ zeroOne $ word NormalContext return $ var := val "assignment" redirection :: P Redir redirection = try (do spaces d <- many digit o <- redirOperator spaces let fd = if null d then Nothing else Just $ read d if o `elem` ["<<","<<-"] then do t <- hereEnd mkHereDoc o fd t else do t <- word NormalContext mkRedir o fd t) "redirection" -- |Parse the heredoc delimiter. Technically this is supposed to be a -- word, but we don't make certain distinctions that @sh@ does (i.e. @$a@ -- vs @${a}@), so I think we're better off just using a string... hereEnd :: P String hereEnd = token $ fromLit `fmap` word HereEndContext where fromLit [] = "" fromLit (Quote _:xs) = fromLit xs fromLit (Quoted q:xs) = fromLit $ q:xs fromLit (Literal l:xs) = l:fromLit xs commands :: P [Command] commands = do newlines many command commandsTill :: P String -> P ([Command],String) commandsTill delim = do rest <- getInput (c,e) <- manyTill' (eatNewlines command) delim c' <- expandHereDocs c -- may not be exhaustive...? return (c',e) -- The order is wrong here, since we could put the Redir's either before or -- after the Word's... We'll need to figure something out to deal with that. -- Easiest would be to just number them or something, and go through one at -- a time at the end to "de-number" them. unorderStatements :: [Command] -> [Command] unorderStatements = mapCommands f where f :: Statement -> Statement f (OrderedStatement ts) = let (ws,rs,as) = f' ts in Statement ws rs as f s = s f' [] = ([],[],[]) f' (TWord w:ts) = let (ws,rs,as) = f' ts in (w:ws,rs,as) f' (TRedir r:ts) = let (ws,rs,as) = f' ts in (ws,r:rs,as) f' (TAssignment a:ts) = let (ws,rs,as) = f' ts in (ws,rs,a:as) expandHereDocs :: [Command] -> P [Command] expandHereDocs c = unorderStatements `fmap` mapCommandsM f c where f (i :<< s) = mk i id f (i :<<- s) = mk i stripTabs f r = return r stripTabs [] = [] stripTabs (Literal n:Literal '\t':rest) | n `elem` "\n\r" = stripTabs (Literal n:rest) stripTabs (x:xs) = x:stripTabs xs mk i f = do mwb <- nextHDReplacement case mwb of -- do we need the Nothing case? (impossible?) Just (w,b) -> return $ Heredoc i b (f w) Nothing -> return $ Heredoc i False [] -- here's a smart use of the Monad class...! :-) hereDocsComplete :: [Command] -> Bool hereDocsComplete = isJust . mapCommandsM complete where complete r = case r of (_:<<_) -> Nothing (_:<<-_) -> Nothing Heredoc _ False _ -> Nothing r -> Just r -- |Ensures there's an 'eof' after whatever we parse. only :: P a -> P a only p = p >>= (\a -> eof >> return a) -- |We need to run the parser occasionally from within, so we provide -- a simpler interface that does all the mapping, etc, for us. parse' :: [(String,String)] -> P a -> String -> Either ParseError a parse' as p s = runParser p (startState as) "" (map Chr s) -- |This is the main export here. We take a list of aliases for the -- environment and a @String@ to parse. The return type is @Right -- [Command]@ if parsing succeeded and @Left (String,Bool)@ upon -- failure. The @Bool@ is @True@ when the error was fatal/unrecoverable. parse :: [(String,String)] -- ^list of alises to expand -> String -- ^input string -> Either (String,Bool) [Command] parse as s = case parse' as (only commands >>= expandHereDocs) s of Left err -> case getFatal err of Just f -> Left (f,True) Nothing -> Left (show err,False) Right cs -> Right cs