{-# LANGUAGE RecordWildCards, NamedFieldPuns #-}
module Regex.Genex.Pure (genexPure) where
import Control.Monad.Logic.Class (MonadLogic(..))
import qualified Data.Text as T
import qualified Data.IntSet as IntSet
import qualified Data.Set as Set
import Data.List (intersect, (\\))
import Control.Monad
import Control.Monad.Stream
import Regex.Genex.Normalize (normalize)
import Debug.Trace
import Text.Regex.TDFA.Pattern
import Text.Regex.TDFA.ReadRegex (parseRegex)
import Control.Monad.State
import Control.Applicative

parse :: String -> Pattern
parse r = case parseRegex r of
    Right (pattern, _) -> pattern
    Left x -> error $ show x

genexPure :: [String] -> [String]
genexPure = map T.unpack . foldl1 intersect . map (toList . run . normalize IntSet.empty . parse)

maxRepeat :: Int
maxRepeat = 3

each = foldl1 (<|>) . map return

run :: Pattern -> Stream T.Text
run p = case p of
    PChar{..} -> isChar getPatternChar
    PAny {getPatternSet = PatternSet (Just cset) _ _ _} -> each $ map T.singleton $ Set.toList cset
    PQuest p -> pure T.empty <|> run p
    PPlus p -> run $ PBound 1 Nothing p
    PStar _ p -> run $ PBound 0 Nothing p
    PBound low high p -> do
        n <- each [low..maybe (low+maxRepeat) id high]
        fmap T.concat . sequence $ replicate n (run p) 
    PConcat ps -> fmap T.concat . suspended . sequence $ map run ps
    POr xs -> foldl1 mplus $ map run xs
    PDot{} -> notChars []
    PEscape {..} -> case getPatternChar of
        'n' -> isChar '\n'
        't' -> isChar '\t'
        'r' -> isChar '\r'
        'f' -> isChar '\f'
        'a' -> isChar '\a'
        'e' -> isChar '\ESC'
        'd' -> chars $ ['0'..'9']
        'w' -> chars $ ['0'..'9'] ++ '_' : ['a'..'z'] ++ ['A'..'Z']
        's' -> chars "\9\10\12\13\32"
        'W' -> notChars $ ['0'..'9']
        'S' -> notChars $ ['0'..'9'] ++ '_' : ['a'..'z'] ++ ['A'..'Z']
        'D' -> notChars "\9\10\12\13\32"
        ch  -> isChar ch
    _      -> error $ show p
    where
    isChar = return . T.singleton
    chars = each . map T.singleton
    notChars = chars . ([' '..'~'] \\)