module Regex.Genex.Pure (genexPure) where
import qualified Data.Text as T
import qualified Data.IntSet as IntSet
import qualified Data.Set as Set
import Data.List (intersect, (\\))
import Control.Monad
import Control.Monad.Stream
import qualified Control.Monad.Stream as Stream
import Regex.Genex.Normalize (normalize)
import Debug.Trace
import Text.Regex.TDFA.Pattern
import Text.Regex.TDFA.ReadRegex (parseRegex)
import Control.Monad.State
import Control.Applicative
parse :: String -> Pattern
parse r = case parseRegex r of
Right (pattern, _) -> pattern
Left x -> error $ show x
genexPure :: [String] -> [String]
genexPure = map T.unpack . foldl1 intersect . map (Stream.runStream . run . normalize IntSet.empty . parse)
maxRepeat :: Int
maxRepeat = 10
each = foldl1 (<|>) . map return
run :: Pattern -> Stream T.Text
run p = case p of
PEmpty -> pure T.empty
PChar{..} -> isChar getPatternChar
PAny {getPatternSet = PatternSet (Just cset) _ _ _} -> each $ map T.singleton $ Set.toList cset
PAnyNot {getPatternSet = PatternSet (Just cset) _ _ _} -> chars $ notChars $ concatMap expandEscape $ Set.toList cset
PQuest p -> pure T.empty <|> run p
PPlus p -> run $ PBound 1 Nothing p
PStar _ p -> run $ PBound 0 Nothing p
PBound low high p -> do
n <- each [low..maybe (low+maxRepeat) id high]
fmap T.concat . sequence $ replicate n (run p)
PConcat ps -> fmap T.concat . suspended . sequence $ map run ps
POr xs -> foldl1 mplus $ map run xs
PDot{} -> chars $ notChars []
PEscape {..} -> chars $ expandEscape getPatternChar
_ -> error $ show p
where
isChar = return . T.singleton
chars = each . map T.singleton
notChars = ([' '..'~'] \\)
expandEscape ch = case ch of
'n' -> "\n"
't' -> "\t"
'r' -> "\r"
'f' -> "\f"
'a' -> "\a"
'e' -> "\ESC"
'd' -> ['0'..'9']
'w' -> ['0'..'9'] ++ '_' : ['a'..'z'] ++ ['A'..'Z']
's' -> "\9\32"
'D' -> notChars $ ['0'..'9']
'W' -> notChars $ ['0'..'9'] ++ '_' : ['a'..'z'] ++ ['A'..'Z']
'S' -> notChars "\9\32"
ch -> [ch]