--	ParseRegexStr.hs
--
--	Author: Yoshikuni Jujo <PAF01143@nifty.ne.jp>
--

module Hidden.ParseRegexStr (
  RegexAction(..)
, parseRegexStr
) where

import Hidden.RegexPRTypes ( RegexAction(..),
                             RegexSrcParser, runRegexSrcParser,
			     getBR, modifyBR,
			     setMode, setModes, getModes,
			     isModeI, isModeM, isModeX )
import Text.ParserCombinators.MTLParse
                           ( runParse, spot, token, tokens, mzero, mplus,
                             still, parseNot, endOfInput, MonadParse,
			     MonadPlus,
                             list, neList, greedyNeList, optional )
import Hidden.Tools	   ( isSymbol, ignoreCase, skipRet, (>..>), ifM,
                             applyIf, (&&&), headOrErr, modifyFst )
import Data.Char	   ( isAlphaNum, isDigit, isSpace )
import Data.Ix             ( inRange )
import Hidden.SrcRegActList( selfTest, oneCharList, backSlashesList, plusesList,
                             parensesList, charClassList )
import Control.Applicative ((<$>))

parseRegexStr :: String -> [RegexAction]
parseRegexStr src =
  fst . fst . headOrErr ("parse error: regex " ++ show src ++ " is uncorrect") .
    (runParse $ runRegexSrcParser parseRegexStrParser) . (,) [] $ src

parseRegexStrParser, parseTokensOr, parseTokens :: RegexSrcParser [RegexAction]
parseRegexStrParser = parseTokensOr >>= endOfInput
parseTokensOr = parseTokens
		`mplus`
                do { ra1 <- parseTokens; token '|'; ra2 <- parseTokensOr;
		     return $ [ RegexOr ra1 ra2 ] }
parseTokens = list parseTokenPlus

parseTokenPlus, parseToken :: RegexSrcParser RegexAction
parseTokenPlus = do ra   <- parseToken
                    plus <- parsePluses plusesList `mplus` parseQuantifier
		    return $ plus ra
parseQuantifier :: RegexSrcParser (RegexAction -> RegexAction)
parseQuantifier
  = do { token '{';
         mn <- neList $ spot isDigit;
         mx <- do { cma <- optional $ token ',';
	            case cma of
		         "" -> return Nothing
			 _  -> list (spot isDigit) >>= return . Just };
         token '}';
	 nd <- optional (token '?') >>= return . null;
         return $ (if nd then Repeat else RepeatNotGreedy) (read mn) $
	                            case mx of
	                                 Nothing -> Just $ read mn
					 Just "" -> Nothing
					 Just n  -> Just $ read n }

parseToken
  = ifM isModeX parseTokenX mzero
    `mplus`
    ( isModeI >>= \ic ->
       spot selfTest >>= return . Select . applyIf ic ignoreCase . (==) )
    `mplus`
    parseOpenBrace
    `mplus`
    ( flip (ifM isModeM) mzero $ token '.' >> return (Select $ const True) )
    `mplus`
    ( token '\\' >> spot isSymbol >>= return . Select . (==) )
    `mplus`
    parseBackReference
    `mplus`
    ( token '[' >> optional (token '^') >>= return . not . null >>= \isNot ->
      parseCharList >>= skipRet (token ']') >>=
      return . Select . applyIf isNot (not.) )
    `mplus`
    ( getBR >>= \i -> token '(' >> modifyBR (+1) >> parseTokensOr
            >>= skipRet (token ')') >>= return . Note i )
    `mplus`
    ( tokens "(?" >> list parseMode >>= mapM_ (uncurry setMode) >> token ')'
                  >> return NopRegex )
    `mplus`
    ( getModes >>= \preModes ->
      tokens "(?" >> list parseMode >>= mapM_ (uncurry setMode) >> token ':' >>
      parseTokensOr >>= skipRet (setModes preModes >> token ')')
                    >>= return . Parens )
    `mplus`
    parseOneChar oneCharList
    `mplus`
    parseBackSlashes backSlashesList
    `mplus`
    parseParenses parensesList
    `mplus`
    ( tokens "(?#" >> list (spot (/=')')) >>= skipRet (token ')') >>=
      return . Comment )

parseMode :: RegexSrcParser (Char, Bool)
parseMode = optional (token '-') >..> spot (flip elem "imx")
              >>= return . uncurry (flip (,)) . modifyFst null

parseTokenX :: RegexSrcParser RegexAction
parseTokenX
  = ( spot isSpace >> return NopRegex ) `mplus`
    ( token '#' >> list (spot (/='\n')) >>=
      skipRet (token '\n' `mplus` endOfInput '\n') >>= return . Comment )

parsePluses ::
  [ (String, RegexAction -> RegexAction) ] ->
				RegexSrcParser (RegexAction -> RegexAction)
parsePluses = concatMapParse (\(t, act) -> tokens t >> return act)

parseOneChar :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction
parseOneChar
  = concatMapParse (\(t, act) -> token t >> return act)

parseBackSlashes :: [ (Char, RegexAction) ] -> RegexSrcParser RegexAction
parseBackSlashes
  = concatMapParse (\(t, act) -> tokens ['\\', t] >> return act)

parseParenses ::
  [ (String, [RegexAction] -> RegexAction) ] -> RegexSrcParser RegexAction
parseParenses
  = concatMapParse ( \(t, act) ->
      tokens ('(':t) >> parseTokensOr >>= skipRet (token ')') >>= return . act )

parseCharList :: RegexSrcParser (Char -> Bool)
parseCharList = do
  modei <- isModeI
  cl1 <- parseOne `mplus` (concatMapParse ((>>= return . (==)) . token) "-]")
  cl2 <- list $ parseOne `mplus` (token '^' >>= return . (==))
  return $ applyIf modei ignoreCase $ or . zipWith ($) (cl1 : cl2) . repeat
  where parseOne       = (parseChar >>= return . (==)) `mplus` parseCharArea
                                                       `mplus` parseCharClass
        parseChar      = ( spot isAlphaNum )                       `mplus`
		         ( token '\\' >> spot isSymbol )           `mplus`
			 ( spot $ selfTest &&& flip notElem "-]" ) `mplus`
			 ( spot $ flip elem ".+$" )                `mplus`
			 ( token '[' >>= skipRet (still $ parseNot ()
			                                $ token ':') )
        parseCharArea  = (parseChar >>= skipRet (token '-')) >..> parseChar
	                   >>= return . inRange
	parseCharClass = concatMapParse
	                   (\(s, p) -> tokens ("[:"++s++":]") >> return p)
			   charClassList

concatMapParse :: MonadPlus m => (b -> m a) -> [b] -> m a
concatMapParse f = foldr mplus mzero . map f

parseOpenBrace :: RegexSrcParser RegexAction
parseOpenBrace = do still $ parseNot () $ parseQuantifier
                    still $ parseNot () $ parseBackReference
                    ret <- token '{'
                    return $ Select (==ret)

parseBackReference :: RegexSrcParser RegexAction
parseBackReference = do
  brace <- null <$> (optional $ token '{')
  token '\\'
  dgt <- greedyNeList (spot isDigit)
  if brace then return ' ' else token '}'
  return $ BackReference $ read dgt