--	ParseRegexStr.hs
--
--	Author: Yoshikuni Jujo <PAF01143@nifty.ne.jp>
--

module Hidden.ParseRegexStr (
  RegexAction(..)
, parseRegexStr
) where

import Hidden.RegexPRTypes ( RegexSrcParser, runRegexSrcParser,
                             RegexAction(..),
			     getBR, modifyBR,
			     setMode, isModeI, isModeM, isModeX )
import Hidden.ParseLib     ( runParse, spot, mplus, mzero, token, tokens,
                             list, greedyList, optional, endOfInput )
import Hidden.Tools	   ( isSymbol, isBit7On, bifurcate, cat2funcL,
                             modifySnd, skipRet, (>..>) )
import Data.Char	   ( isAlphaNum, isAlpha, isDigit, isSpace,
                             toUpper, toLower )
import Data.Ix             ( inRange )
import Hidden.SrcRegActList( plusesList, oneCharList, backSlashesList,
                             parensesList, charClassList, selfList )

parseRegexStr :: String -> [RegexAction]
parseRegexStr =
  fst . fst . head . (runParse $ runRegexSrcParser parseRegexStrParser) . (,) []

parseRegexStrParser, parseTokensOr, parseTokens :: RegexSrcParser [RegexAction]
parseRegexStrParser = parseTokensOr >>= endOfInput
parseTokensOr = parseTokens
		`mplus`
                do { ra1 <- parseTokens; token '|'; ra2 <- parseTokensOr;
		     return $ [ RegexOr ra1 ra2 ] }
parseTokens = greedyList parseTokenPlus

parseTokenPlus, parseToken, parseAlphaNum :: RegexSrcParser RegexAction
parseTokenPlus = do ra   <- parseToken
                    plus <- parsePluses plusesList `mplus` parseQuantifier
		    return $ plus ra
parseQuantifier :: RegexSrcParser (RegexAction -> RegexAction)
parseQuantifier
  = do { token '{';
         mn <- list $ spot isDigit;
         mx <- do { cma <- optional $ token ',';
	             case cma of
		          "" -> return Nothing
			  _  -> list (spot isDigit) >>= return . Just };
         token '}';
	 nd <- optional (token '?') >>= return . null;
         return $ (if nd then Repeat else RepeatNotGreedy) (read mn) $
	                            case mx of
	                                 Nothing -> Just $ read mn
					 Just "" -> Nothing
					 Just n  -> Just $ read n }

parseToken
  = parseAlphaNum
    `mplus`
    ( token '\\' >> spot isSymbol >>= return . Select . (==) )
    `mplus`
    do { token '\\'; optional (token '{');
         d1 <- spot isDigit; d2 <- greedyList (spot isDigit);
	 optional (token '}');
         return $ BackReference $ read $ d1:d2 }
    `mplus`
    do { token '['; isNotNot <- optional (token '^') >>= return . null;
         cl <- parseCharList; token ']';
         return $ Select $ (if isNotNot then id else (not.)) cl }
    `mplus`
    do { i <- getBR; token '('; modifyBR (+1); ras <- parseTokensOr; token ')';
         return $ Note i ras }
    `mplus`
    do { tokens "(?"; list parseMode >>= mapM_ (uncurry setMode); token ')';
	 return NopRegex }
    `mplus`
    do { tokens "(?"; modes <- list parseMode; token ':';
	 flip mapM_ modes $ uncurry setMode;
         ras <- parseTokensOr;
	 flip mapM_ modes $ uncurry setMode . modifySnd not; token ')';
	 return $ Parens ras }
    `mplus`
    do { m <- isModeM;
         if m then token '.' >> return (Select $ const True) else mzero }
    `mplus`
    do { x <- isModeX; if x then parseTokenX else mzero }
    `mplus`
    parseOneChar oneCharList
    `mplus`
    ( foldr mplus mzero $ map ((>>= return . Select . (==)) . token) selfList )
    `mplus`
    parseBackSlashes backSlashesList
    `mplus`
    parseParenses parensesList
    `mplus`
    ( spot isBit7On >>= return . Select . (==) )
    `mplus`
    do { tokens "(?#"; c <- list $ spot (/=')'); token ')'; return $ Comment c }

parseAlphaNum = do
  { i <- isModeI;
    spot isAlpha >>= return . Select .
      if i then flip elem . bifurcate (cat2funcL toUpper toLower)
           else (==) }
  `mplus` ( spot isDigit >>= return . Select . (==) )

parseMode :: RegexSrcParser (Char, Bool)
parseMode = do b <- optional (token '-') >>= return . null
               spot (flip elem "imx") >>= return . flip (,) b

parseTokenX :: RegexSrcParser RegexAction
parseTokenX = ( spot isSpace >> return NopRegex ) `mplus`
	      ( token '#' >> list (spot (/='\n')) >>=
	        skipRet (token '\n' `mplus` endOfInput '\n') >>=
	        return . Comment )

parsePluses ::
  [ (String, RegexAction -> RegexAction) ] ->
				RegexSrcParser (RegexAction -> RegexAction)
parsePluses = foldr mplus mzero . map (\(t, act) -> tokens t >> return act)

parseOneChar :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction
parseOneChar
  = foldr mplus mzero . map (\(t, act) -> token t >> return act)

parseBackSlashes :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction
parseBackSlashes
  = foldr mplus mzero . map (\(t, act) -> tokens ['\\', t] >> return act)

parseParenses ::
  [ (String, [RegexAction] -> RegexAction) ] -> RegexSrcParser RegexAction
parseParenses
  = foldr mplus mzero . map ( \(t, act) ->
      tokens ('(':t) >> parseTokensOr >>= skipRet (token ')') >>= return . act )

parseCharList :: RegexSrcParser (Char -> Bool)
parseCharList = do
  cl <- greedyList (parseChar `mplus` parseCharArea `mplus` parseCharClass);
  return $ or . zipWith ($) cl . repeat
  where parseChar = (spot isAlphaNum `mplus` (token '\\' >> spot isSymbol))
                      >>= return . (==)
        parseCharArea = (spot isAlphaNum >>= skipRet (token '-')) >..>
	                 spot isAlphaNum >>= return . inRange
	parseCharClass = foldr mplus mzero $
	                   map (\(s, p) -> tokens ("[:"++s++":]") >> return p)
			       charClassList