module Hidden.ParseRegexStr (
RegexAction(..)
, parseRegexStr
) where
import Hidden.RegexPRTypes ( RegexSrcParser, runRegexSrcParser,
RegexAction(..),
getBR, modifyBR,
setMode, isModeI, isModeM, isModeX )
import Hidden.ParseLib ( runParse, spot, mplus, mzero, token, tokens,
list, greedyList, optional, endOfInput )
import Hidden.Tools ( isSymbol, isBit7On,
modifySnd, skipRet, (>..>), ignoreCase )
import Data.Char ( isAlphaNum, isAlpha, isDigit, isSpace,
)
import Data.Ix ( inRange )
import Hidden.SrcRegActList( plusesList, oneCharList, backSlashesList,
parensesList, charClassList, selfList )
parseRegexStr :: String -> [RegexAction]
parseRegexStr =
fst . fst . head . (runParse $ runRegexSrcParser parseRegexStrParser) . (,) []
parseRegexStrParser, parseTokensOr, parseTokens :: RegexSrcParser [RegexAction]
parseRegexStrParser = parseTokensOr >>= endOfInput
parseTokensOr = parseTokens
`mplus`
do { ra1 <- parseTokens; token '|'; ra2 <- parseTokensOr;
return $ [ RegexOr ra1 ra2 ] }
parseTokens = greedyList parseTokenPlus
parseTokenPlus, parseToken, parseAlphaNum :: RegexSrcParser RegexAction
parseTokenPlus = do ra <- parseToken
plus <- parsePluses plusesList `mplus` parseQuantifier
return $ plus ra
parseQuantifier :: RegexSrcParser (RegexAction -> RegexAction)
parseQuantifier
= do { token '{';
mn <- list $ spot isDigit;
mx <- do { cma <- optional $ token ',';
case cma of
"" -> return Nothing
_ -> list (spot isDigit) >>= return . Just };
token '}';
nd <- optional (token '?') >>= return . null;
return $ (if nd then Repeat else RepeatNotGreedy) (read mn) $
case mx of
Nothing -> Just $ read mn
Just "" -> Nothing
Just n -> Just $ read n }
parseToken
= parseAlphaNum
`mplus`
( token '\\' >> spot isSymbol >>= return . Select . (==) )
`mplus`
do { token '\\'; optional (token '{');
d1 <- spot isDigit; d2 <- greedyList (spot isDigit);
optional (token '}');
return $ BackReference $ read $ d1:d2 }
`mplus`
do { token '['; isNotNot <- optional (token '^') >>= return . null;
cl <- parseCharList; token ']';
return $ Select $ (if isNotNot then id else (not.)) cl }
`mplus`
do { i <- getBR; token '('; modifyBR (+1); ras <- parseTokensOr; token ')';
return $ Note i ras }
`mplus`
do { tokens "(?"; list parseMode >>= mapM_ (uncurry setMode); token ')';
return NopRegex }
`mplus`
do { tokens "(?"; modes <- list parseMode; token ':';
flip mapM_ modes $ uncurry setMode;
ras <- parseTokensOr;
flip mapM_ modes $ uncurry setMode . modifySnd not; token ')';
return $ Parens ras }
`mplus`
do { m <- isModeM;
if m then token '.' >> return (Select $ const True) else mzero }
`mplus`
do { x <- isModeX; if x then parseTokenX else mzero }
`mplus`
parseOneChar oneCharList
`mplus`
( foldr mplus mzero $ map ((>>= return . Select . (==)) . token) selfList )
`mplus`
parseBackSlashes backSlashesList
`mplus`
parseParenses parensesList
`mplus`
( spot isBit7On >>= return . Select . (==) )
`mplus`
do { tokens "(?#"; c <- list $ spot (/=')'); token ')'; return $ Comment c }
parseAlphaNum = do
{ i <- isModeI;
spot isAlpha >>= return . Select .
if i then ignoreCase . (==)
else (==) }
`mplus` ( spot isDigit >>= return . Select . (==) )
parseMode :: RegexSrcParser (Char, Bool)
parseMode = do b <- optional (token '-') >>= return . null
spot (flip elem "imx") >>= return . flip (,) b
parseTokenX :: RegexSrcParser RegexAction
parseTokenX = ( spot isSpace >> return NopRegex ) `mplus`
( token '#' >> list (spot (/='\n')) >>=
skipRet (token '\n' `mplus` endOfInput '\n') >>=
return . Comment )
parsePluses ::
[ (String, RegexAction -> RegexAction) ] ->
RegexSrcParser (RegexAction -> RegexAction)
parsePluses = foldr mplus mzero . map (\(t, act) -> tokens t >> return act)
parseOneChar :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction
parseOneChar
= foldr mplus mzero . map (\(t, act) -> token t >> return act)
parseBackSlashes :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction
parseBackSlashes
= foldr mplus mzero . map (\(t, act) -> tokens ['\\', t] >> return act)
parseParenses ::
[ (String, [RegexAction] -> RegexAction) ] -> RegexSrcParser RegexAction
parseParenses
= foldr mplus mzero . map ( \(t, act) ->
tokens ('(':t) >> parseTokensOr >>= skipRet (token ')') >>= return . act )
parseCharList :: RegexSrcParser (Char -> Bool)
parseCharList = do
modei <- isModeI
cl1 <- parseOne `mplus`
(foldr mplus mzero $ map ((>>= return . (==)) . token) "[]")
cl2 <- greedyList $ parseOne `mplus`
(foldr mplus mzero $ map ((>>= return . (==)) . token) "^")
return $ (if modei then ignoreCase else id) $ or . zipWith ($) (cl1 : cl2)
. repeat
where parseOne = (parseChar >>= return . (==)) `mplus` parseCharArea
`mplus` parseCharClass
parseChar = ( spot isAlphaNum )
`mplus`
( token '\\' >> spot isSymbol )
`mplus`
( foldr mplus mzero $ map ((>>= return ) . token)
$ filter (/='-') selfList )
parseCharArea = (parseChar >>= skipRet (token '-')) >..> parseChar >>=
return . inRange
parseCharClass = foldr mplus mzero $
map (\(s, p) -> tokens ("[:"++s++":]") >> return p)
charClassList