module Text.RegExp.Data where
import Data.Semiring
newtype RegExp c = RegExp (forall w . Semiring w => RegW w c)
data RegW w c = RegW { active :: !Bool,
empty :: !w,
final_ :: !w,
reg :: !(Reg w c) }
final :: Semiring w => RegW w c -> w
final r = if active r then final_ r else zero
data Reg w c = Eps
| Sym String (c -> w)
| Alt (RegW w c) (RegW w c)
| Seq (RegW w c) (RegW w c)
| Rep (RegW w c)
class Semiring w => Weight a b w where
symWeight :: (a -> w) -> b -> w
defaultSymWeight :: (a -> w) -> a -> w
defaultSymWeight = id
instance Weight c c Bool where
symWeight = defaultSymWeight
instance Num a => Weight c c (Numeric a) where
symWeight = defaultSymWeight
weighted :: Weight a b w => RegW w a -> RegW w b
weighted (RegW a e f r) =
case r of
Eps -> RegW a e f Eps
Sym s p -> RegW a e f (Sym s (symWeight p))
Alt p q -> RegW a e f (Alt (weighted p) (weighted q))
Seq p q -> RegW a e f (Seq (weighted p) (weighted q))
Rep p -> RegW a e f (Rep (weighted p))
eps :: RegExp c
eps = RegExp epsW
epsW :: Semiring w => RegW w c
epsW = RegW False one zero Eps
char :: Char -> RegExp Char
char c = psym (quote c) (c==)
sym :: (Eq c, Show c) => c -> RegExp c
sym c = psym (show c) (c==)
quote :: Char -> String
quote c | c `elem` " \\|*+?.[]{}^" = '\\' : [c]
| otherwise = [c]
psym :: String -> (c -> Bool) -> RegExp c
psym s p = RegExp (symW s (fromBool . p))
symW :: Semiring w => String -> (c -> w) -> RegW w c
symW s p = RegW False zero zero $ Sym s p
anySym :: RegExp c
anySym = psym "." (const True)
noMatch :: RegExp c
noMatch = psym "[]" (const False)
alt :: RegExp c -> RegExp c -> RegExp c
alt (RegExp p) (RegExp q) =
RegExp (RegW False (empty p .+. empty q) zero (Alt p q))
altW :: Semiring w => RegW w c -> RegW w c -> RegW w c
altW p q = RegW (active p || active q)
(empty p .+. empty q)
(final p .+. final q)
(Alt p q)
seq_ :: RegExp c -> RegExp c -> RegExp c
seq_ (RegExp p) (RegExp q) =
RegExp (RegW False (empty p .*. empty q) zero (Seq p q))
seqW :: Semiring w => RegW w c -> RegW w c -> RegW w c
seqW p q = RegW (active p || active q)
(empty p .*. empty q)
(final p .*. empty q .+. final q)
(Seq p q)
rep :: RegExp c -> RegExp c
rep (RegExp r) = RegExp (RegW False one zero (Rep r))
repW :: Semiring w => RegW w c -> RegW w c
repW r = RegW (active r) one (final r) (Rep r)
rep1 :: RegExp c -> RegExp c
rep1 r = r `seq_` rep r
opt :: RegExp c -> RegExp c
opt r = eps `alt` r
brep :: (Int,Int) -> RegExp c -> RegExp c
brep (n,m) r
| n < 0 || m < 0 || n > m = error msg
| n == 0 && m == 0 = eps
| n == m = foldr1 seq_ (replicate n r)
| otherwise = foldr seq_ rest (replicate n r)
where
rest = foldr nestopt (opt r) (replicate (mn1) r)
nestopt p q = opt (seq_ p q)
msg = "Text.RegExp.brep: invalid repetition bounds: " ++ show (n,m)
regW :: Semiring w => RegExp c -> RegW w c
regW (RegExp r) = r
instance Show (RegExp Char) where
showsPrec n r = showsPrec n (regW r :: RegW Bool Char)
instance Show (RegW Bool Char) where
showsPrec n r = showsPrec n (reg r)
instance Show (Reg Bool Char) where
showsPrec _ Eps = showString "()"
showsPrec _ (Sym s _) = showString s
showsPrec n (Alt p q) = showParen (n > 0)
$ showsPrec 1 p
. showString "|"
. shows q
showsPrec n (Seq p q) = showParen (n > 1)
$ showsPrec 2 p
. showsPrec 1 q
showsPrec _ (Rep r) = showsPrec 2 r . showString "*"
instance Eq (RegExp Char) where
p == q = regW p == (regW q :: RegW Bool Char)
instance Eq (RegW Bool Char) where
p == q = reg p == reg q
instance Eq (Reg Bool Char) where
Eps == Eps = True
Sym s _ == Sym t _ = s==t
Alt a b == Alt c d = a==c && b==d
Seq a b == Seq c d = a==c && b==d
Rep a == Rep b = a==b
_ == _ = False