module Hidden.ParseRegexStr ( RegexAction(..) , parseRegexStr ) where import Hidden.RegexPRTypes ( RegexSrcParser, RegexAction(..), reverseRegexAction ) import Hidden.ParseLib ( runParse, spot, token, mplus, mzero, tokens, list, greedyList, endOfInput ) import Control.Monad.State( runStateT, get, modify, liftM ) import Hidden.Tools ( isSymbol ) import Data.Char ( isAlphaNum, isDigit ) import Hidden.SrcRegActList ( plusesList, backSlashesList, parensesList) parseRegexStr :: String -> [RegexAction] parseRegexStr = fst . fst . head . (runParse $ runStateT parseRegexStrParser 1) . (,) [] parseRegexStrParser, parseTokensOr, parseTokens :: RegexSrcParser [RegexAction] parseRegexStrParser = parseTokensOr >>= endOfInput parseTokensOr = parseTokens `mplus` do { ra1 <- parseTokens; token '|'; ra2 <- parseTokensOr; return $ [ RegexOr ra1 ra2 ] } parseTokens = greedyList parseTokenPlus parseTokenPlus, parseToken :: RegexSrcParser RegexAction parseTokenPlus = do ra <- parseToken plus <- parsePluses plusesList `mplus` parseQuantifier return $ plus ra parsePluses :: [ (String, RegexAction -> RegexAction) ] -> RegexSrcParser (RegexAction -> RegexAction) parsePluses = foldr mplus mzero . map (\(t, act) -> tokens t >> return act) parseQuantifier :: RegexSrcParser (RegexAction -> RegexAction) parseQuantifier = do { token '{'; m <- list (spot isDigit); token '}'; return $ Repeat (read m) (Just $ read m) } `mplus` do { token '{'; m <- list (spot isDigit); tokens ",}"; return $ Repeat (read m) Nothing } `mplus` do { token '{'; m <- list (spot isDigit); token ','; n <- list (spot isDigit); token '}'; return $ Repeat (read m) (Just $ read n) } `mplus` do { token '{'; m <- list (spot isDigit); tokens "}?"; return $ RepeatNotGreedy (read m) (Just $ read m) } `mplus` do { token '{'; m <- list (spot isDigit); tokens ",}?"; return $ RepeatNotGreedy (read m) Nothing } `mplus` do { token '{'; m <- list (spot isDigit); token ','; n <- list (spot isDigit); tokens "}?"; return $ RepeatNotGreedy (read m) (Just $ read n) } parseToken = do { token '.'; return $ Select (/='\n') } `mplus` do { token '$'; return $ RegexOr [EndOfInput] [Still [Select (=='\n')]] } `mplus` do { token '^'; return $ RegexOr [BeginningOfInput] [Still [Backword [Select (=='\n')]]] } `mplus` do { c <- spot isAlphaNum; return $ Select (==c) } `mplus` do { spot (==' '); return $ Select (==' ') } `mplus` do { spot (==','); return $ Select (==',') } `mplus` do { token '\\'; c <- spot isSymbol; return $ Select (==c) } `mplus` do { token '\\'; d1 <- spot isDigit; d2 <- greedyList $ spot isDigit; return $ BackReference $ read $ d1:d2 } `mplus` do { token '['; cl <- parseCharList; token ']'; return $ Select (flip elem cl) } `mplus` do { token '['; token '^'; cl <- parseCharList; token ']'; return $ Select (flip notElem cl) } `mplus` do { i <- get; token '('; modify (+1); ras <- parseTokensOr; token ')'; return $ Note i ras } `mplus` parseBackSlashes backSlashesList `mplus` parseParenses parensesList `mplus` do { tokens "(?#"; com <- list $ spot (/=')'); token ')'; return $ Comment com } parseBackSlashes :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction parseBackSlashes = foldr mplus mzero . map (\(t, act) -> tokens ['\\', t] >> return act) parseParenses :: [ (String, [RegexAction] -> RegexAction) ] -> RegexSrcParser RegexAction parseParenses = foldr mplus mzero . map (\(t, act) -> do tokens ('(':t) ras <- parseTokensOr token ')' return $ act ras) parseCharList :: RegexSrcParser String parseCharList = liftM concat $ greedyList ((parseChar >>= return . (:[])) `mplus` parseCharArea) where parseChar = spot isAlphaNum `mplus` ( token '\\' >> spot isSymbol ) parseCharArea = do { b <- parseChar; token '-'; e <- parseChar; return [b..e] }