>
> module Text.Regex.Deriv.Translate
> ( translate, translatePosix ) where
> import Control.Monad.State
> import Data.Char (chr)
> import qualified Data.IntMap as IM
> import Text.Regex.Deriv.ExtPattern
> import Text.Regex.Deriv.IntPattern
> import Text.Regex.Deriv.RE
> import Text.Regex.Deriv.Common
>
> data TState = TState { ngi :: NGI
> , gi :: GI
> , anchorStart :: Bool
> , anchorEnd :: Bool
> , posix :: Bool
> , posix_binder :: IM.IntMap ()
> }
> deriving Show
>
> initTState = TState { ngi = 3, gi = 1, anchorStart = False, anchorEnd = False, posix = False, posix_binder = IM.empty }
> initTStatePosix = TState { ngi = 3, gi = 1, anchorStart = False, anchorEnd = False, posix = True, posix_binder = IM.empty }
> type NGI = Int
> type GI = Int
getters and putters
> getNGI :: State TState NGI
> getNGI = do { st <- get
> ; return $ ngi st
> }
> getIncNGI :: State TState NGI
> getIncNGI = do { st <- get
> ; let i = ngi st
> ; put st{ngi=(i1)}
> ; return i
> }
> getGI :: State TState GI
> getGI = do { st <- get
> ; return $ gi st
> }
> getIncGI :: State TState GI
> getIncGI = do { st <- get
> ; let i = gi st
> ; put st{gi=(i+1)}
> ; return i
> }
> getAnchorStart :: State TState Bool
> getAnchorStart = do { st <- get
> ; return (anchorStart st)
> }
> setAnchorStart :: State TState ()
> setAnchorStart = do { st <- get
> ; put st{anchorStart=True}
> }
> getAnchorEnd :: State TState Bool
> getAnchorEnd = do { st <- get
> ; return (anchorEnd st)
> }
> setAnchorEnd :: State TState ()
> setAnchorEnd = do { st <- get
> ; put st{anchorEnd=True}
> }
> isPosix :: State TState Bool
> isPosix = do { st <- get
> ; return (posix st)
> }
> isPosixBinder :: GI -> State TState Bool
> isPosixBinder gi = do { st <- get
> ; return (IM.member gi (posix_binder st))
> }
> addPosixBinder :: Int -> State TState ()
> addPosixBinder i = do { st <- get
> ; let bs = posix_binder st
> bs' = IM.insert i () bs
> ; put st{posix_binder=bs'}
> }
>
> translate :: EPat -> Pat
> translate epat = case runState (trans epat) initTState of
> (pat, state) ->
> let hasAnchorS = anchorStart state
> hasAnchorE = anchorEnd state
> in case (hasAnchorS, hasAnchorE) of
> (True, True) -> PVar mainBinder [] pat
> (True, False) -> PPair (PVar mainBinder [] pat) (PVar subBinder [] (PE [(Star Any NotGreedy)]))
> (False, True) -> PPair (PVar preBinder [] (PE [(Star Any NotGreedy)])) (PVar mainBinder [] pat)
>
>
> (False, False) -> (PPair (PVar preBinder [] (PE [(Star Any NotGreedy)])) (PVar preBinder_ [] (PPair (PVar mainBinder [] pat) (PVar subBinder [] (PE [(Star Any NotGreedy)])))))
>
> translatePosix :: EPat -> (Pat,IM.IntMap ())
> translatePosix epat = case runState (trans epat) initTStatePosix of
> (pat, state) ->
> let hasAnchorS = anchorStart state
> hasAnchorE = anchorEnd state
> posixBnd = posix_binder state
> in case (hasAnchorS, hasAnchorE) of
> (True, True) -> (PVar mainBinder [] pat, posixBnd)
> (True, False) -> (PPair (PVar mainBinder [] pat) (PVar subBinder [] (PE [(Star Any NotGreedy)])), posixBnd)
> (False, True) -> (PPair (PVar preBinder [] (PE [(Star Any NotGreedy)])) (PVar mainBinder [] pat), posixBnd)
>
>
> (False, False) -> ((PPair (PVar preBinder [] (PE [(Star Any NotGreedy)])) (PVar preBinder_ [] (PPair (PVar mainBinder [] pat) (PVar subBinder [] (PE [(Star Any NotGreedy)]))))), posixBnd)
>
> trans :: EPat -> State TState Pat
> trans epat =
> do { is_posix <- isPosix
> ; if is_posix && isStructural epat
> then do
> { gi <- getIncGI
> ; ipat <- trans' epat
> ; if isPVar ipat
> then do
> { return ipat
> }
> else do
> { addPosixBinder gi
> ; return (PVar gi [] ipat)
> }
> }
> else trans' epat
> }
> where isStructural :: EPat -> Bool
> isStructural (EOr _) = True
> isStructural (EConcat _) = True
> isStructural (EOpt _ _) = True
> isStructural (EPlus _ _) = True
> isStructural (EStar _ _) = True
> isStructural _ = False
> isPVar :: Pat -> Bool
> isPVar (PVar _ _ _) = True
> isPVar _ = False
> trans' :: EPat -> State TState Pat
> trans' epat
> | hasGroup epat = p_trans epat
> | otherwise = do
> { r <- r_trans epat
> ; return (PE [r])
> }
>
>
> adhocSimp :: Pat -> State TState Pat
> adhocSimp q@(PChoice [ (PVar x _ p) ] g) = do
> { b <- isPosixBinder x
> ; if b
> then return p
> else return q }
> adhocSimp q = return q
>
> p_trans :: EPat -> State TState Pat
> p_trans epat =
> case epat of
>
> { EEmpty ->
> do { return ( PE [Empty] )
> }
>
> ; EGroup e ->
> do { i <- getIncGI
> ;
> ; p <- trans' e
> ; p' <- adhocSimp p
> ; return ( PVar i [] p')
> }
>
> ; EGroupNonMarking e ->
> trans' e
> ; EOr es ->
>
> do { ps <- mapM trans es
> ; case ps of
> { (p':ps') ->
> return (PChoice ps Greedy )
> ; [] -> error "an empty choice enountered."
> }
> }
> ; EConcat es ->
>
> do { ps <- mapM trans es
> ; case reverse ps of
> { (p':ps') ->
> return (foldl (\xs x -> PPair x xs) p' ps')
> ; [] -> error "an empty sequence enountered."
> }
> }
> ; EOpt e b ->
>
> do { p <- trans e
> ; let g | b = Greedy
> | otherwise = NotGreedy
> ; return (PChoice [p,PE [Empty]] g)
> }
> ; EPlus e b ->
>
> do { p <- trans e
> ; let g | b = Greedy
> | otherwise = NotGreedy
> ; return (PPair p (PStar p g))
> }
> ; EStar e b ->
>
> do { p <- trans e
> ; let g | b = Greedy
> | otherwise = NotGreedy
> ; return (PStar p g)
> }
> ; EBound e low (Just high) b ->
>
> do { r <- r_trans e
> ; i <- getIncNGI
> ; let g | b = Greedy
> | otherwise = NotGreedy
> r1s = take low $ repeat r
> r1 = case r1s of
> { [] -> Empty
> ; (r':rs') -> foldl (\ rs r -> Seq rs r) r' rs'
> }
> r2s = take (high low) $ repeat (Choice [r,Empty] g)
> r2 = case r2s of
> { [] -> Empty
> ; (r':rs') -> foldl (\ rs r -> Seq rs r) r' rs'
> }
> r3 = case (r1,r2) of
> (Empty, Empty) -> Empty
> (Empty, _ ) -> r2
> (_ , Empty) -> r1
> (_ , _ ) -> Seq r1 r2
> p = PVar i [] (PE [r3])
> ; return p
> }
> ; EBound e low Nothing b ->
>
> do { r <- r_trans e
> ; i <- getIncNGI
> ; let g | b = Greedy
> | otherwise = NotGreedy
> r1s = take low $ repeat r
> r1 = case r1s of
> { [] -> Empty
> ; (r':rs') -> foldl (\ rs r -> Seq rs r) r' rs'
> }
> r2 = Seq r1 (Star r g)
> p = PVar i [] (PE [r2])
> ; return p
> }
> ; ECarat ->
>
>
>
> do { f <- getAnchorStart
> ; if f
> then do { i <- getIncNGI
> ; let r = L '^'
> p = PVar i [] (PE [r])
> ; return p
> }
> else do { setAnchorStart
> ; i <- getIncNGI
> ; let r = Empty
> p = PVar i [] (PE [r])
> ; return p
> }
> }
> ; EDollar ->
>
>
> do { f <- getAnchorEnd
> ; if f
> then return ()
> else setAnchorEnd
> ; i <- getIncNGI
> ; let r = Empty
> p = PVar i [] (PE [r])
> ; return p
> }
> ; EDot ->
>
>
> do { i <- getIncNGI
> ; let r = Any
> p = PVar i [] (PE [r])
> ; return p
> }
> ; EAny cs ->
>
>
> do { i <- getIncNGI
> ; let r = char_list_to_re cs
>
> p = PVar i [] (PE [r])
> ; return p
> }
> ; ENoneOf cs ->
>
>
> do { i <- getIncNGI
> ; let
> r = Not cs
> p = PVar i [] (PE [r])
> ; return p
> }
> ; EEscape c ->
>
>
> do { i <- getIncNGI
> ; let p = PVar i [] (PE [L c])
> ; return p
> }
> ; EChar c ->
>
>
> do { i <- getIncNGI
> ; let p = PVar i [] (PE [L c])
> ; return p
> }
> }
> char_list_to_re (c:cs) = Choice (map L (c:cs)) Greedy
> char_list_to_re [] = error "char_list_to_re expects non-empty list"
> alphas = char_list_to_re (['a'..'z'] ++ ['A'..'Z'])
> digits = char_list_to_re ['0'..'9']
> sigma = map chr [0 .. 255]
> anychar = char_list_to_re sigma
e ~>_r r
> r_trans :: EPat -> State TState RE
> r_trans e =
> case e of
> { EEmpty ->
>
> return Empty
> ; EGroup e ->
>
> r_trans e
> ; EGroupNonMarking e ->
>
> r_trans e
> ; EOr es ->
>
> do { rs <- mapM r_trans es
> ; case rs of
> { [] -> return Phi
> ; (r:rs) -> return (Choice (r:rs) Greedy)
> }
> }
> ; EConcat es ->
>
> do { rs <- mapM r_trans es
> ; case rs of
> { [] -> return Empty
> ; (r:rs) -> return (foldl (\ xs x -> Seq xs x) r rs)
> }
> }
> ; EOpt e b ->
>
> do { r <- r_trans e
> ; let g | b = Greedy
> | otherwise = NotGreedy
> ; return (Choice [r,Empty] g)
> }
> ; EPlus e b ->
>
> do { r <- r_trans e
> ; let g | b = Greedy
> | otherwise = NotGreedy
> ; return (Seq r (Star r g))
> }
> ; EStar e b ->
>
> do { r <- r_trans e
> ; let g | b = Greedy
> | otherwise = NotGreedy
> ; return (Star r g)
> }
> ; EBound e low (Just high) b ->
>
> do { r <- r_trans e
> ; let g | b = Greedy
> | otherwise = NotGreedy
> r1s = take low $ repeat r
> r1 = case r1s of
> { [] -> Empty
> ; (r':rs') -> foldl (\ rs r -> Seq rs r) r' rs'
> }
> r2s = take (high low) $ repeat (Choice [r,Empty] g)
> r2 = case r2s of
> { [] -> Empty
> ; (r':rs') -> foldl (\ rs r -> Seq rs r) r' rs'
> }
> r3 = case (r1,r2) of
> (Empty, Empty) -> Empty
> (Empty, _ ) -> r2
> (_ , Empty) -> r1
> (_ , _ ) -> Seq r1 r2
> ; return r3
> }
> ; EBound e low Nothing b ->
>
> do { r <- r_trans e
> ; let g | b = Greedy
> | otherwise = NotGreedy
> r1s = take low $ repeat r
> r1 = case r1s of
> { [] -> Empty
> ; (r':rs') -> foldl (\ rs r -> Seq rs r) r' rs'
> }
> r2 = Seq r1 (Star r g)
> ; return r2
> }
> ; ECarat ->
>
>
>
> do { f <- getAnchorStart
> ; if f
> then return (L '^')
> else do { setAnchorStart
> ; return Empty
> }
> }
> ; EDollar ->
>
>
> do { f <- getAnchorEnd
> ; if f
> then return ()
> else setAnchorEnd
> ; return Empty
> }
> ; EDot ->
>
>
> return Any
> ; EAny cs ->
>
> return (char_list_to_re cs)
> ; ENoneOf cs ->
>
>
> return (Not cs)
> ; EEscape c ->
>
> return $ L c
> ; EChar c ->
>
> return $ L c
> }
>