-- |Here we define the interface to 'Parsec', resulting in a -- 'GenParser' type that behaves much like a stateful 'CharParser', -- but with the added abstraction of dealing with aliases. module Language.Sh.Parser.Parsec where import Text.ParserCombinators.Parsec ( GenParser, getState, setState, tokenPrim, count, (<|>), (), skipMany, many, eof, getInput, setInput ) import Text.ParserCombinators.Parsec.Pos ( updatePosChar ) import Text.ParserCombinators.Parsec.Error ( ParseError, Message(..), errorMessages, errorPos, newErrorMessage ) import Data.Char ( isUpper, isLower, isAlpha, isAlphaNum, isDigit, isHexDigit, isOctDigit ) import Data.Monoid ( Monoid, mappend ) import Data.Maybe ( listToMaybe ) import Control.Monad ( unless, when ) import Debug.Trace ( trace ) import Language.Sh.Syntax ( Word ) -- |Generalized @Char@. data MChar = Ctl Control | Chr Char instance Show MChar where show (Ctl (AliasOn b)) = "AliasOn "++show b show (Ctl (Aliases s)) = "Aliases "++show s show (Ctl (IncPos b)) = "IncPos "++show b show (Chr c) = show c -- |We need to intersperse control codes with the @Char@s. These -- will have monadic actions, but will not affect the 'uncons'. data Control = AliasOn Bool | Aliases [(String,String)] | IncPos Bool -- ^turn on/off SourcePos counting. -- instance Show Control where show _ = "" -- |Much-reduced state to keep track of. data ParserState = PS { aliasOK :: Bool , aliases :: [(String,String)] , incPos :: Bool , parenDepth :: Int , hereDocs :: [String] , readHereDocs :: [(Word,Bool)] } type P = GenParser MChar ParserState startState :: [(String,String)] -> ParserState startState as = PS True as True 0 [] [] modify :: (ParserState -> ParserState) -> P () modify f = setState =<< fmap f getState getAliasInfo :: P (Bool, [(String,String)], Bool) getAliasInfo = fmap (\(PS a b c _ _ _) -> (a,b,c)) getState setAliasInfo :: (Bool, [(String,String)], Bool) -> P () setAliasInfo (a,b,c) = modify $ \(PS _ _ _ d h h') -> PS a b c d h h' insideParens :: P Bool insideParens = fmap (\s -> parenDepth s > 0) getState openParen :: P () openParen = modify $ \s -> s { parenDepth = parenDepth s+1 } closeParen :: P () closeParen = modify $ \s -> s { parenDepth = parenDepth s-1 } getParenDepth :: P Int getParenDepth = fmap parenDepth getState addHereDoc :: String -> P () addHereDoc d = modify $ \s -> s { hereDocs = hereDocs s ++ [d] } nextHereDoc :: P (Maybe String) nextHereDoc = fmap (listToMaybe . hereDocs) getState popHereDoc :: (Word,Bool) -> P () popHereDoc (w,b) = modify $ \s -> s { hereDocs = drop 1 $ hereDocs s , readHereDocs = readHereDocs s ++ [(w,b)] } nextHDReplacement :: P (Maybe (Word,Bool)) nextHDReplacement = do rhd <- readHereDocs `fmap` getState case rhd of (next:rest) -> do modify $ \s -> s { readHereDocs = rest } return $ Just next [] -> return Nothing fatal :: String -> P a fatal = fail . ('!':) getFatal :: ParseError -> Maybe String getFatal e = listToMaybe $ filter (not . null) $ map isFatal $ errorMessages e where isFatal (Message ('!':s)) = s isFatal _ = "" unFatal :: ParseError -> ParseError unFatal e = case getFatal e of Just s -> newErrorMessage (Message s) (errorPos e) Nothing -> e -- fatal :: String -> P a -- fatal err = do modify $ \s -> s { fatalError = True } -- fail err -- isFatal :: P Bool -- isFatal = fmap fatalError getState -- |This is a useful combinator. infixl 3 <++>, <:> (<++>) :: Monoid w => GenParser i s w -> GenParser i s w -> GenParser i s w a <++> b = do w <- a w' <- b return $ w `mappend` w' (<:>) :: GenParser i s a -> GenParser i s [a] -> GenParser i s [a] a <:> b = do w <- a w' <- b return $ w:w' tr :: Show a => String -> P a -> P a tr s p = do a <- p return $ trace (s++": "++show a) a --catMany :: Show a => P [a] -> P [a] --catMany = fmap concat . many . tr "catMany" -- * Here we re-implement much of Text.Parsec.Char oneOf :: [Char] -> P Char oneOf cs = satisfy' ("oneOf: "++show cs) (\c -> elem c cs) noneOf :: [Char] -> P Char noneOf cs = satisfy' ("noneOf: "++show cs) (\c -> not (elem c cs)) spaces :: P () spaces = skipMany space "white space" space :: P Char space = satisfy' ("space") isBlank "space" space_ :: P () space_ = space >> return () isBlank :: Char -> Bool isBlank = (`elem` " \t") one :: P a -> P [a] one = sequence . replicate 1 zeroOne :: P a -> P [a] zeroOne p = one p <|> return [] newline :: P () -- how does this affect SourcePos? newline = (count 1 (char '\n') >> zeroOne (char '\r') >> return ()) <|> (count 1 (char '\r') >> zeroOne (char '\n') >> return ()) "newline" tab :: P Char tab = char '\t' "tab" upper :: P Char upper = satisfy isUpper "uppercase letter" lower :: P Char lower = satisfy isLower "lowercase letter" alphaNum :: P Char alphaNum = satisfy' "alphaNum" isAlphaNum "letter or digit" alphaUnder :: P Char alphaUnder = satisfy' "alphaUnder" (\c -> isAlpha c || c=='_') "letter or underscore" alphaUnderNum :: P Char alphaUnderNum = satisfy' "alphaUnderNum" (\c -> isAlphaNum c || c=='_') "letter, number, or underscore" letter :: P Char letter = satisfy' "alpha" isAlpha "letter" digit :: P Char digit = satisfy' "digit" isDigit "digit" hexDigit :: P Char hexDigit = satisfy' "hexDigit" isHexDigit "hexadecimal digit" octDigit :: P Char octDigit = satisfy' "octDigit" isOctDigit "octal digit" char :: Char -> P Char char c = satisfy' ("char: "++show c) (==c) show [c] anyChar :: P Char anyChar = satisfy' "anyChar" (const True) satisfy' :: String -> (Char -> Bool) -> P Char -- satisfy' m f = satisfy'' True $ trace m f satisfy' _ = satisfy'' False -- |This is where all the real work is done... we just make sure -- to always call everything in terms of @satisfy@ now. -- This seems to be a bit broken... I think we need to read -- the @Ctl@ tokens immediately along with anything else, so that -- @Consumed@ will be accurate... -- The other option, I guess, would be to use a type -- @data MChar = MChar [Control] Char@ -- and then just stack the control codes on either the space or else -- the next eligible letter. satisfy = satisfy'' False -- This is for debugging... satisfy'' :: Bool -> (Char -> Bool) -> P Char satisfy'' v f = do ip <- incPos `fmap` getState let update = if ip then updatePosChar else const c <- tokenPrim showToken (nextpos update) test unless (isBlank c) $ modify $ \s -> s { aliasOK = False } runCtls v return c where showToken (Chr c) = show c nextpos u p (Chr c) _ = u p c test (Chr c) = if f c then Just c else Nothing runCtls :: Bool -> P () runCtls v = getInput >>= run >>= setInput where run [] = return [] run (Ctl a:xs) = act a >> run xs run xs = return xs act (AliasOn b) = modify $ t "AliasOn" b $ \s -> s { aliasOK = b } act (Aliases as) = modify $ t "Aliases" as $ \s -> s { aliases = as } act (IncPos b) = modify $ t "IncPos" b $ \s -> s { incPos = b } t s x = if v then trace (s++": "++show x) else id -- From the source, it appears the state gets threaded through <|> correctly. -- i.e. (setState ... >> fail ...) <|> (return ()) -- -> doesn't change the state (since that's bound up with reading) aliasOn :: P () aliasOn = modify $ \s -> s { aliasOK = True } string :: String -> P String string [] = return [] string (c:cs) = do c <- char c fmap (c:) $ string cs -- errors should work correctly... schar :: Char -> P Char schar c = do x <- char c spaces return x -- *More general functions assocL :: P a -> P (b -> a -> b) -> (a -> b) -> P b assocL p op single = do x <- p rest $ single x where rest x = do f <- op y <- p rest (f x y) <|> return x getInput' :: P String getInput' = do ts <- getInput return $ concatMap f ts where f (Chr c) = [c] f _ = [] tok :: Char -> String tok c | c `elem` "\n\r" = "newline" | otherwise = [c] -- |Parse spaces afterwards token :: P a -> P a token p = do p' <- p spaces return p' unexpectedToken :: P a unexpectedToken = do s <- getInput' when (null s) $ err '\n' err (head s) where err c = fatal $ "syntax error near unexpected token `"++tok c++"'" putBack :: Char -> P () --putBack c = setInput =<< ((Chr c:) `fmap` getInput) putBack c = do i <- getInput setInput $ Chr c:trace ("putting back a "++[c]++": "++show i) i -- |This version allows a newline/eof without being fatal. unexpected :: P a unexpected = (anyChar >>= putBack >> unexpectedToken) <|> fail "" unexpectedNoEOF :: P a unexpectedNoEOF = unexpected