{-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >=710 {-# LANGUAGE Safe #-} #elif __GLASGOW_HASKELL__ >=702 {-# LANGUAGE Trustworthy #-} #endif -- | Charactor classes. module RERE.CharClasses ( CharClasses, charClasses, classOfChar, ) where import RERE.Type import qualified Data.Set as Set import qualified RERE.CharSet as CS -- | Character classes are represented by partition lower bounds. type CharClasses = Set.Set Char -- | Character classes. -- -- We can partition 'Char' so characters in each part, -- affect the given regular expression in the same way. -- -- If we do some kind of memoising, we can map all characters -- to 'classOfChar', making everything smaller. -- charClasses :: RE a -> CharClasses charClasses = charsetClasses . Set.toList . collect -- | Map char to the representer of a class. classOfChar :: CharClasses -> Char -> Char #if MIN_VERSION_containers(0,5,0) classOfChar cc c = case Set.lookupLE c cc of Just c' -> c' Nothing -> '\NUL' #else -- old containers: slow path classOfChar _ c = c #endif collect :: RE a -> Set.Set CS.CharSet collect = go where go :: RE a -> Set.Set CS.CharSet go Null = Set.empty go Full = Set.empty go Eps = Set.empty go (Ch cs) = Set.singleton cs go (App r s) = Set.union (go r) (go s) go (Alt r s) = Set.union (go r) (go s) go (Star r) = go r #ifdef RERE_INTERSECTION go (And r s) = Set.union (go r) (go s) #endif go (Var _) = Set.empty go (Let _ r s) = Set.union (go r) (go s) go (Fix _ r) = go r charsetClasses :: [CS.CharSet] -> CharClasses charsetClasses = go (Set.singleton '\NUL') where go acc [] = acc go acc (cs:css) = go (Set.union acc $ Set.fromList $ concatMap bounds $ CS.toIntervalList cs) css bounds (x, y) | y == maxBound = [x] | otherwise = [x, succ y]