module GF.Speech.RegExp (RE(..), epsilonRE, nullRE, isEpsilon, isNull, unionRE, concatRE, seqRE, repeatRE, minimizeRE, mapRE, mapRE', joinRE, symbolsRE, dfa2re, prRE) where import Data.List import GF.Data.Utilities import GF.Speech.FiniteState data RE a = REUnion [RE a] -- ^ REUnion [] is null | REConcat [RE a] -- ^ REConcat [] is epsilon | RERepeat (RE a) | RESymbol a deriving (Eq,Ord,Show) dfa2re :: (Ord a) => DFA a -> RE a dfa2re = finalRE . elimStates . modifyTransitions merge . addLoops . oneFinalState () epsilonRE . mapTransitions RESymbol where addLoops fa = newTransitions [(s,s,nullRE) | (s,_) <- states fa] fa merge es = [(f,t,unionRE ls) | ((f,t),ls) <- buildMultiMap [((f,t),l) | (f,t,l) <- es]] elimStates :: (Ord a) => DFA (RE a) -> DFA (RE a) elimStates fa = case [s | (s,_) <- states fa, isInternal fa s] of [] -> fa sE:_ -> elimStates $ insertTransitionsWith (\x y -> unionRE [x,y]) ts $ removeState sE fa where sAs = nonLoopTransitionsTo sE fa sBs = nonLoopTransitionsFrom sE fa r2 = unionRE $ loops sE fa ts = [(sA, sB, r r1 r3) | (sA,r1) <- sAs, (sB,r3) <- sBs] r r1 r3 = concatRE [r1, repeatRE r2, r3] epsilonRE :: RE a epsilonRE = REConcat [] nullRE :: RE a nullRE = REUnion [] isNull :: RE a -> Bool isNull (REUnion []) = True isNull _ = False isEpsilon :: RE a -> Bool isEpsilon (REConcat []) = True isEpsilon _ = False unionRE :: Ord a => [RE a] -> RE a unionRE = unionOrId . sortNub . concatMap toList where toList (REUnion xs) = xs toList x = [x] unionOrId [r] = r unionOrId rs = REUnion rs concatRE :: [RE a] -> RE a concatRE xs | any isNull xs = nullRE | otherwise = case concatMap toList xs of [r] -> r rs -> REConcat rs where toList (REConcat xs) = xs toList x = [x] seqRE :: [a] -> RE a seqRE = concatRE . map RESymbol repeatRE :: RE a -> RE a repeatRE x | isNull x || isEpsilon x = epsilonRE | otherwise = RERepeat x finalRE :: Ord a => DFA (RE a) -> RE a finalRE fa = concatRE [repeatRE r1, r2, repeatRE (unionRE [r3, concatRE [r4, repeatRE r1, r2]])] where s0 = startState fa [sF] = finalStates fa r1 = unionRE $ loops s0 fa r2 = unionRE $ map snd $ nonLoopTransitionsTo sF fa r3 = unionRE $ loops sF fa r4 = unionRE $ map snd $ nonLoopTransitionsFrom sF fa reverseRE :: RE a -> RE a reverseRE (REConcat xs) = REConcat $ map reverseRE $ reverse xs reverseRE (REUnion xs) = REUnion (map reverseRE xs) reverseRE (RERepeat x) = RERepeat (reverseRE x) reverseRE x = x minimizeRE :: Ord a => RE a -> RE a minimizeRE = reverseRE . mergeForward . reverseRE . mergeForward mergeForward :: Ord a => RE a -> RE a mergeForward (REUnion xs) = unionRE [concatRE [mergeForward y,mergeForward (unionRE rs)] | (y,rs) <- buildMultiMap (map firstRE xs)] mergeForward (REConcat (x:xs)) = concatRE [mergeForward x,mergeForward (REConcat xs)] mergeForward (RERepeat r) = repeatRE (mergeForward r) mergeForward r = r firstRE :: RE a -> (RE a, RE a) firstRE (REConcat (x:xs)) = (x, REConcat xs) firstRE r = (r,epsilonRE) mapRE :: (a -> b) -> RE a -> RE b mapRE f = mapRE' (RESymbol . f) mapRE' :: (a -> RE b) -> RE a -> RE b mapRE' f (REConcat xs) = REConcat (map (mapRE' f) xs) mapRE' f (REUnion xs) = REUnion (map (mapRE' f) xs) mapRE' f (RERepeat x) = RERepeat (mapRE' f x) mapRE' f (RESymbol s) = f s joinRE :: RE (RE a) -> RE a joinRE (REConcat xs) = REConcat (map joinRE xs) joinRE (REUnion xs) = REUnion (map joinRE xs) joinRE (RERepeat xs) = RERepeat (joinRE xs) joinRE (RESymbol ss) = ss symbolsRE :: RE a -> [a] symbolsRE (REConcat xs) = concatMap symbolsRE xs symbolsRE (REUnion xs) = concatMap symbolsRE xs symbolsRE (RERepeat x) = symbolsRE x symbolsRE (RESymbol x) = [x] -- Debugging prRE :: (a -> String) -> RE a -> String prRE = prRE' 0 prRE' :: Int -> (a -> String) -> RE a -> String prRE' _ _ (REUnion []) = "" prRE' n f (REUnion xs) = p n 1 (concat (intersperse " | " (map (prRE' 1 f) xs))) prRE' n f (REConcat xs) = p n 2 (unwords (map (prRE' 2 f) xs)) prRE' n f (RERepeat x) = p n 3 (prRE' 3 f x) ++ "*" prRE' _ f (RESymbol s) = f s p n m s | n >= m = "(" ++ s ++ ")" | True = s