module Regex.Genex.Pure (genexPure) where
import qualified Data.Text as T
import qualified Data.IntSet as IntSet
import qualified Data.Set as Set
import Data.List (intersect, (\\))
import Control.Monad
import Control.Monad.Omega
import Regex.Genex.Normalize (normalize)
import Text.Regex.TDFA.Pattern
import Text.Regex.TDFA.ReadRegex (parseRegex)

parse :: String -> Pattern
parse r = case parseRegex r of
    Right (pattern, _) -> pattern
    Left x -> error $ show x

genexPure :: [String] -> [String]
genexPure = map T.unpack . foldl1 intersect . map (runOmega . omega . normalize IntSet.empty . parse)

maxRepeat :: Int
maxRepeat = 3

omega :: Pattern -> Omega T.Text
omega p = case p of
    PChar{..} -> isChar getPatternChar
    PAny {getPatternSet = PatternSet (Just cset) _ _ _} -> each $ map T.singleton $ Set.toList cset
    PQuest p -> join $ each [return T.empty, omega p]
    PPlus p -> omega $ PBound 1 Nothing p
    PStar _ p -> omega $ PBound 0 Nothing p
    PBound low high p -> do
        p <- omega p
        n <- each [low..maybe (low+3) id high]
        return $ T.replicate n p
    PConcat ps -> fmap T.concat . sequence $ map omega ps
    POr xs -> foldl1 mplus $ map omega xs
    PDot{} -> notChars []
    PEscape {..} -> case getPatternChar of
        'n' -> isChar '\n'
        't' -> isChar '\t'
        'r' -> isChar '\r'
        'f' -> isChar '\f'
        'a' -> isChar '\a'
        'e' -> isChar '\ESC'
        'd' -> chars $ ['0'..'9']
        'w' -> chars $ ['0'..'9'] ++ '_' : ['a'..'z'] ++ ['A'..'Z']
        's' -> chars "\9\10\12\13\32"
        'W' -> notChars $ ['0'..'9']
        'S' -> notChars $ ['0'..'9'] ++ '_' : ['a'..'z'] ++ ['A'..'Z']
        'D' -> notChars "\9\10\12\13\32"
        ch  -> isChar ch
    _      -> error $ show p
    where
    isChar :: Char -> Omega T.Text
    isChar = return . T.singleton
    chars = each . map T.singleton
    notChars = chars . ([' '..'~'] \\)