-- ParseRegexStr.hs -- -- Author: Yoshikuni Jujo -- module Hidden.ParseRegexStr ( RegexAction(..) , parseRegexStr ) where import Hidden.RegexPRTypes ( RegexSrcParser, RegexAction(..), reverseRegexAction, getBR, modifyBR, isModeI, isModeM, isModeX, setMode, runRegexSrcParser ) import Hidden.ParseLib( runParse, spot, token, mplus, mzero, tokens, list, greedyList, optional, endOfInput ) import Control.Monad.State( liftM ) import Hidden.Tools ( isSymbol, (|||) ) import Data.Char ( isAlphaNum, isAlpha, isDigit, isSpace, toUpper, toLower ) import Hidden.SrcRegActList ( plusesList, oneCharList, backSlashesList, parensesList, charClassList ) parseRegexStr :: String -> [RegexAction] parseRegexStr = fst . fst . head . (runParse $ runRegexSrcParser parseRegexStrParser) . (,) [] parseRegexStrParser, parseTokensOr, parseTokens :: RegexSrcParser [RegexAction] parseRegexStrParser = parseTokensOr >>= endOfInput parseTokensOr = parseTokens `mplus` do { ra1 <- parseTokens; token '|'; ra2 <- parseTokensOr; return $ [ RegexOr ra1 ra2 ] } parseTokens = greedyList parseTokenPlus parseTokenPlus, parseToken, parseAlpha :: RegexSrcParser RegexAction parseTokenPlus = do ra <- parseToken plus <- parsePluses plusesList `mplus` parseQuantifier return $ plus ra parseQuantifier :: RegexSrcParser (RegexAction -> RegexAction) parseQuantifier = do { token '{'; m <- list $ spot isDigit; max <- do { cma <- optional $ token ','; case cma of "" -> return Nothing _ -> list (spot isDigit) >>= return . Just; }; token '}'; nd <- optional (token '?') >>= return . null; return $ (if nd then Repeat else RepeatNotGreedy) (read m) $ case max of Nothing -> Just $ read m Just "" -> Nothing Just n -> Just $ read n } parseToken = do parseAlpha `mplus` do { c <- spot isDigit; return $ Select (==c) } `mplus` do { token '\\'; c <- spot isSymbol; return $ Select (==c) } `mplus` do { token '\\'; d1 <- spot isDigit; d2 <- greedyList $ spot isDigit; return $ BackReference $ read $ d1:d2 } `mplus` do { token '['; isNotNot <- optional (token '^') >>= return . null; cl <- parseCharList; token ']'; return $ Select $ (if isNotNot then id else (not.)) cl } `mplus` do { i <- getBR; token '('; modifyBR (+1); ras <- parseTokensOr; token ')'; return $ Note i ras } `mplus` do { tokens "(?"; list parseMode >>= mapM_ (uncurry setMode); token ')'; return NopRegex } `mplus` do { tokens "(?"; modes <- list parseMode; mapM_ (uncurry setMode) modes; token ':'; ras <- parseTokensOr; mapM_ (\(m,b) -> setMode m $ not b) modes; token ')'; return $ Parens ras } `mplus` do { m <- isModeM; if m then token '.' >> return (Select $ const True) else mzero } `mplus` do { x <- isModeX; if x then parseTokenX else mzero } `mplus` parseOneChar oneCharList `mplus` parseBackSlashes backSlashesList `mplus` parseParenses parensesList `mplus` do { tokens "(?#"; c <- list $ spot (/=')'); token ')'; return $ Comment c } parseAlpha = do i <- isModeI if i then do { c <- spot isAlpha; return $ Select $ flip elem $ [toUpper c, toLower c] } else do { c <- spot isAlpha; return $ Select (==c) } parseMode :: RegexSrcParser (Char, Bool) parseMode = do b <- optional (token '-') >>= return . null spot (flip elem "imx") >>= return . flip (,) b parseTokenX :: RegexSrcParser RegexAction parseTokenX = do { spot isSpace; return NopRegex } `mplus` do { token '#'; c <- list $ spot (/='\n'); token '\n'; return $ Comment c } parsePluses :: [ (String, RegexAction -> RegexAction) ] -> RegexSrcParser (RegexAction -> RegexAction) parsePluses = foldr mplus mzero . map (\(t, act) -> tokens t >> return act) parseOneChar :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction parseOneChar = foldr mplus mzero . map (\(t, act) -> token t >> return act) parseBackSlashes :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction parseBackSlashes = foldr mplus mzero . map (\(t, act) -> tokens ['\\', t] >> return act) parseParenses :: [ (String, [RegexAction] -> RegexAction) ] -> RegexSrcParser RegexAction parseParenses = foldr mplus mzero . map (\(t, act) -> do tokens ('(':t) ras <- parseTokensOr token ')' return $ act ras) parseCharList :: RegexSrcParser (Char -> Bool) parseCharList = do cl <- greedyList (parseChar `mplus` parseCharArea `mplus` parseCharClass) return $ \c -> or $ zipWith ($) cl (repeat c) where parseChar = (spot isAlphaNum `mplus` (token '\\' >> spot isSymbol)) >>= return . (==) parseCharArea = do { b <- spot isAlphaNum; token '-'; e <- spot isAlphaNum; return $ flip elem [b..e] } parseCharClass = foldr mplus mzero $ map (\(s, p) -> tokens ("[:"++s++":]") >> return p) charClassList -- tokens "[:alnum:]" >> return isAlphaNum