-- ParseRegexStr.hs -- -- Author: Yoshikuni Jujo -- module Hidden.ParseRegexStr ( RegexAction(..) , parseRegexStr ) where import Hidden.RegexPRTypes ( RegexSrcParser, runRegexSrcParser, RegexAction(..), getBR, modifyBR, setMode, isModeI, isModeM, isModeX ) import Hidden.ParseLib ( runParse, spot, mplus, mzero, token, tokens, list, greedyList, optional, endOfInput ) import Hidden.Tools ( isSymbol, isBit7On, bifurcate, cat2funcL, modifySnd, skipRet, (>..>) ) import Data.Char ( isAlphaNum, isAlpha, isDigit, isSpace, toUpper, toLower ) import Data.Ix ( inRange ) import Hidden.SrcRegActList( plusesList, oneCharList, backSlashesList, parensesList, charClassList ) parseRegexStr :: String -> [RegexAction] parseRegexStr = fst . fst . head . (runParse $ runRegexSrcParser parseRegexStrParser) . (,) [] parseRegexStrParser, parseTokensOr, parseTokens :: RegexSrcParser [RegexAction] parseRegexStrParser = parseTokensOr >>= endOfInput parseTokensOr = parseTokens `mplus` do { ra1 <- parseTokens; token '|'; ra2 <- parseTokensOr; return $ [ RegexOr ra1 ra2 ] } parseTokens = greedyList parseTokenPlus parseTokenPlus, parseToken, parseAlphaNum :: RegexSrcParser RegexAction parseTokenPlus = do ra <- parseToken plus <- parsePluses plusesList `mplus` parseQuantifier return $ plus ra parseQuantifier :: RegexSrcParser (RegexAction -> RegexAction) parseQuantifier = do { token '{'; mn <- list $ spot isDigit; mx <- do { cma <- optional $ token ','; case cma of "" -> return Nothing _ -> list (spot isDigit) >>= return . Just }; token '}'; nd <- optional (token '?') >>= return . null; return $ (if nd then Repeat else RepeatNotGreedy) (read mn) $ case mx of Nothing -> Just $ read mn Just "" -> Nothing Just n -> Just $ read n } parseToken = parseAlphaNum `mplus` ( token '\\' >> spot isSymbol >>= return . Select . (==) ) `mplus` do { token '\\'; optional (token '{'); d1 <- spot isDigit; d2 <- greedyList (spot isDigit); optional (token '}'); return $ BackReference $ read $ d1:d2 } `mplus` do { token '['; isNotNot <- optional (token '^') >>= return . null; cl <- parseCharList; token ']'; return $ Select $ (if isNotNot then id else (not.)) cl } `mplus` do { i <- getBR; token '('; modifyBR (+1); ras <- parseTokensOr; token ')'; return $ Note i ras } `mplus` do { tokens "(?"; list parseMode >>= mapM_ (uncurry setMode); token ')'; return NopRegex } `mplus` do { tokens "(?"; modes <- list parseMode; token ':'; flip mapM_ modes $ uncurry setMode; ras <- parseTokensOr; flip mapM_ modes $ uncurry setMode . modifySnd not; token ')'; return $ Parens ras } `mplus` do { m <- isModeM; if m then token '.' >> return (Select $ const True) else mzero } `mplus` do { x <- isModeX; if x then parseTokenX else mzero } `mplus` parseOneChar oneCharList `mplus` parseBackSlashes backSlashesList `mplus` parseParenses parensesList `mplus` ( spot isBit7On >>= return . Select . (==) ) `mplus` do { tokens "(?#"; c <- list $ spot (/=')'); token ')'; return $ Comment c } parseAlphaNum = do { i <- isModeI; spot isAlpha >>= return . Select . if i then flip elem . bifurcate (cat2funcL toUpper toLower) else (==) } `mplus` ( spot isDigit >>= return . Select . (==) ) parseMode :: RegexSrcParser (Char, Bool) parseMode = do b <- optional (token '-') >>= return . null spot (flip elem "imx") >>= return . flip (,) b parseTokenX :: RegexSrcParser RegexAction parseTokenX = ( spot isSpace >> return NopRegex ) `mplus` ( token '#' >> list (spot (/='\n')) >>= skipRet (token '\n' `mplus` endOfInput '\n') >>= return . Comment ) parsePluses :: [ (String, RegexAction -> RegexAction) ] -> RegexSrcParser (RegexAction -> RegexAction) parsePluses = foldr mplus mzero . map (\(t, act) -> tokens t >> return act) parseOneChar :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction parseOneChar = foldr mplus mzero . map (\(t, act) -> token t >> return act) parseBackSlashes :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction parseBackSlashes = foldr mplus mzero . map (\(t, act) -> tokens ['\\', t] >> return act) parseParenses :: [ (String, [RegexAction] -> RegexAction) ] -> RegexSrcParser RegexAction parseParenses = foldr mplus mzero . map ( \(t, act) -> tokens ('(':t) >> parseTokensOr >>= skipRet (token ')') >>= return . act ) parseCharList :: RegexSrcParser (Char -> Bool) parseCharList = do cl <- greedyList (parseChar `mplus` parseCharArea `mplus` parseCharClass); return $ or . zipWith ($) cl . repeat where parseChar = (spot isAlphaNum `mplus` (token '\\' >> spot isSymbol)) >>= return . (==) parseCharArea = (spot isAlphaNum >>= skipRet (token '-')) >..> spot isAlphaNum >>= return . inRange parseCharClass = foldr mplus mzero $ map (\(s, p) -> tokens ("[:"++s++":]") >> return p) charClassList