{-# LANGUAGE CPP         #-}
#if __GLASGOW_HASKELL__ >=710
{-# LANGUAGE Safe        #-}
#elif __GLASGOW_HASKELL__ >=702
{-# LANGUAGE Trustworthy #-}
#endif
-- | Charactor classes.
module RERE.CharClasses (
    CharClasses,
    charClasses,
    classOfChar,
    ) where

import RERE.Type

import qualified Data.Set     as Set
import qualified RERE.CharSet as CS

-- | Character classes are represented by partition lower bounds.
type CharClasses = Set.Set Char

-- | Character classes.
--
-- We can partition 'Char' so characters in each part,
-- affect the given regular expression in the same way.
--
-- If we do some kind of memoising, we can map all characters
-- to 'classOfChar', making everything smaller.
--
charClasses :: RE a -> CharClasses
charClasses :: forall a. RE a -> CharClasses
charClasses = [CharSet] -> CharClasses
charsetClasses forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Set a -> [a]
Set.toList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. RE a -> Set CharSet
collect

-- | Map char to the representer of a class.
classOfChar :: CharClasses -> Char -> Char
#if MIN_VERSION_containers(0,5,0)
classOfChar :: CharClasses -> Char -> Char
classOfChar CharClasses
cc Char
c = case forall a. Ord a => a -> Set a -> Maybe a
Set.lookupLE Char
c CharClasses
cc of
    Just Char
c' -> Char
c'
    Maybe Char
Nothing -> Char
'\NUL'
#else
-- old containers: slow path
classOfChar _ c = c
#endif

collect :: RE a -> Set.Set CS.CharSet
collect :: forall a. RE a -> Set CharSet
collect = forall a. RE a -> Set CharSet
go where
    go :: RE a -> Set.Set CS.CharSet
    go :: forall a. RE a -> Set CharSet
go RE a
Null        = forall a. Set a
Set.empty
    go RE a
Full        = forall a. Set a
Set.empty
    go RE a
Eps         = forall a. Set a
Set.empty
    go (Ch CharSet
cs)     = forall a. a -> Set a
Set.singleton CharSet
cs
    go (App RE a
r RE a
s)   = forall a. Ord a => Set a -> Set a -> Set a
Set.union (forall a. RE a -> Set CharSet
go RE a
r) (forall a. RE a -> Set CharSet
go RE a
s)
    go (Alt RE a
r RE a
s)   = forall a. Ord a => Set a -> Set a -> Set a
Set.union (forall a. RE a -> Set CharSet
go RE a
r) (forall a. RE a -> Set CharSet
go RE a
s)
    go (Star RE a
r)    = forall a. RE a -> Set CharSet
go RE a
r
#ifdef RERE_INTERSECTION
    go (And r s)   = Set.union (go r) (go s)
#endif
    go (Var a
_)     = forall a. Set a
Set.empty
    go (Let Name
_ RE a
r RE (Var a)
s) = forall a. Ord a => Set a -> Set a -> Set a
Set.union (forall a. RE a -> Set CharSet
go RE a
r) (forall a. RE a -> Set CharSet
go RE (Var a)
s)
    go (Fix Name
_ RE (Var a)
r)   = forall a. RE a -> Set CharSet
go RE (Var a)
r

charsetClasses :: [CS.CharSet] -> CharClasses
charsetClasses :: [CharSet] -> CharClasses
charsetClasses = CharClasses -> [CharSet] -> CharClasses
go (forall a. a -> Set a
Set.singleton Char
'\NUL') where
    go :: CharClasses -> [CharSet] -> CharClasses
go CharClasses
acc []       = CharClasses
acc
    go CharClasses
acc (CharSet
cs:[CharSet]
css) = CharClasses -> [CharSet] -> CharClasses
go
        (forall a. Ord a => Set a -> Set a -> Set a
Set.union CharClasses
acc forall a b. (a -> b) -> a -> b
$ forall a. Ord a => [a] -> Set a
Set.fromList forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap forall {a}. (Eq a, Bounded a, Enum a) => (a, a) -> [a]
bounds forall a b. (a -> b) -> a -> b
$ CharSet -> [(Char, Char)]
CS.toIntervalList CharSet
cs)
        [CharSet]
css

    bounds :: (a, a) -> [a]
bounds (a
x, a
y) | a
y forall a. Eq a => a -> a -> Bool
== forall a. Bounded a => a
maxBound = [a
x]
                  | Bool
otherwise     = [a
x, forall a. Enum a => a -> a
succ a
y]