-- ------------------------------------------------------------ {- | Module : Text.Regex.XMLSchema.String.Regex Copyright : Copyright (C) 2010 - Uwe Schmidt License : MIT Maintainer : Uwe Schmidt Stability : stable Portability: portable W3C XML Schema Regular Expression Matcher Grammar can be found under -} -- ------------------------------------------------------------ module Text.Regex.XMLSchema.String.Regex ( Regex , GenRegex , mkZero , mkUnit , mkSym , mkSym1 , mkSymRng , mkWord , mkDot , mkStar , mkAll , mkAlt , mkElse , mkSeq , mkSeqs , mkRep , mkRng , mkOpt , mkDiff , mkIsect , mkExor , mkInterleave , mkCompl , mkBr , isZero , errRegex , nullable , nullable' , delta1 , delta , firstChars , matchWithRegex , matchWithRegex' , splitWithRegex , splitWithRegex' , splitWithRegexCS , splitWithRegexCS' ) where import Data.List -- ( intercalate ) import Data.Set.CharSet -- ------------------------------------------------------------ data GenRegex l = Zero String | Unit | Sym CharSet | Dot | Star (GenRegex l) | Alt (GenRegex l) (GenRegex l) | Else (GenRegex l) (GenRegex l) | Seq (GenRegex l) (GenRegex l) | Rep Int (GenRegex l) -- 1 or more repetitions | Rng Int Int (GenRegex l) -- n..m repetitions | Diff (GenRegex l) (GenRegex l) -- r1 - r2 | Isec (GenRegex l) (GenRegex l) -- r1 n r2 | Exor (GenRegex l) (GenRegex l) -- r1 xor r2 | Intl (GenRegex l) (GenRegex l) -- r1 interleavedWith r2 | Br (Label l) (GenRegex l) String -- currently parsed (...) | Cbr (GenRegex l) [(Label l, String)] --already completely parsed (...) deriving (Eq, Ord {-, Show -}) type Regex = GenRegex String type Label l = Maybe l -- we need one special label for the whole expression -- see splitWithRegex type Nullable l = (Bool, [(Label l, String)]) -- ------------------------------------------------------------ {- just for documentation class Inv a where inv :: a -> Bool instance Inv (GenRegex l) where inv (Zero _) = True inv Unit = True inv (Sym p) = not (nulCS p) && not (fullCS p) inv Dot = True inv (Star e) = inv e inv (Alt e1 e2) = inv e1 && inv e2 inv (Seq e1 e2) = inv e1 && inv e2 inv (Rep i e) = i > 0 && inv e inv (Rng i j e) = (i < j || (i == j && i > 1)) && inv e inv (Diff e1 e2) = inv e1 && inv e2 inv (Isec e1 e2) = inv e1 && inv e2 inv (Exor e1 e2) = inv e1 && inv e2 -} -- ------------------------------------------------------------ -- -- smart constructors -- | construct the r.e. for the empty set. -- An (error-) message may be attached mkZero :: String -> GenRegex l mkZero = Zero {-# INLINE mkZero #-} -- | construct the r.e. for the set containing the empty word mkUnit :: GenRegex l mkUnit = Unit {-# INLINE mkUnit #-} -- | construct the r.e. for a set of chars mkSym :: CharSet -> GenRegex l mkSym s | nullCS s = mkZero $ "empty char range" | fullCS s = mkDot | otherwise = Sym s {-# INLINE mkSym #-} -- | construct an r.e. for a single char set mkSym1 :: Char -> GenRegex l mkSym1 = mkSym . singleCS {-# INLINE mkSym1 #-} -- | construct an r.e. for an intervall of chars mkSymRng :: Char -> Char -> GenRegex l mkSymRng c1 c2 = mkSym $ rangeCS c1 c2 {-# INLINE mkSymRng #-} -- | mkSym generaized for strings mkWord :: [Char] -> GenRegex l mkWord = mkSeqs . map mkSym1 -- | construct an r.e. for the set of all Unicode chars mkDot :: GenRegex l mkDot = Dot {-# INLINE mkDot #-} -- | construct an r.e. for the set of all Unicode words mkAll :: Eq l => GenRegex l mkAll = mkStar mkDot {-# INLINE mkAll #-} -- | construct r.e. for r* mkStar :: Eq l => GenRegex l -> GenRegex l mkStar (Zero _) = mkUnit -- {}* == () mkStar e@Unit = e -- ()* == () mkStar e@(Star _e1) = e -- (r*)* == r* mkStar (Rep 1 e1) = mkStar e1 -- (r+)* == r* mkStar (Rep i e1) | i == 1 || nullable e1 = mkStar e1 -- (r{i,})* == r* when i == 1 or nullable r mkStar e@(Rng _ _ e1) | nullable e = mkStar e1 -- (r{i,j})* == r* when i == 0 or nullable r mkStar e@(Alt _ _) = Star (rmStar e) -- (a*|b)* == (a|b)* {- this is wrong, not generally applicable mkStar (Br l r s) = mkBr0 l (mkStar r) s -- ({l}r)* == ({l}r*) because we want the longest match as result for the subexpression -} mkStar e = Star e rmStar :: Eq l => GenRegex l -> GenRegex l rmStar (Alt e1 e2) = mkAlt (rmStar e1) (rmStar e2) rmStar (Star e1) = rmStar e1 rmStar (Rep 1 e1) = rmStar e1 rmStar e1 = e1 -- | construct the r.e for r1|r2 mkAlt :: Eq l => GenRegex l -> GenRegex l -> GenRegex l mkAlt e1 (Zero _) = e1 -- e1 u {} = e1 mkAlt (Zero _) e2 = e2 -- {} u e2 = e2 mkAlt (Sym p1) (Sym p2) = mkSym $ p1 `unionCS` p2 -- melting of predicates mkAlt e1 e2@(Sym _) = mkAlt e2 e1 -- symmetry: predicates always first mkAlt e1@(Sym _) (Alt e2@(Sym _) e3) = mkAlt (mkAlt e1 e2) e3 -- prepare melting of predicates mkAlt (Sym _) e2@Dot = e2 -- c|. = . for a c's mkAlt e1@(Star Dot) _e2 = e1 -- A* u e1 = A* mkAlt _e1 e2@(Star Dot) = e2 -- e1 u A* = A* mkAlt (Alt e1 e2) e3 = mkAlt e1 (mkAlt e2 e3) -- associativity mkAlt e1 e2 | e1 == e2 = e1 | otherwise = Alt e1 e2 -- | construct the r.e. for r1{|}r2 (r1 orElse r2). -- -- This represents the same r.e. as r1|r2, but when -- collecting the results of subexpressions in (...) and r1 succeeds, the -- subexpressions of r2 are discarded, so r1 matches are prioritized -- -- example -- -- > splitSubex "({1}x)|({2}.)" "x" = ([("1","x"),("2","x")], "") -- > -- > splitSubex "({1}x){|}({2}.)" "x" = ([("1","x")], "") mkElse :: Eq l => GenRegex l -> GenRegex l -> GenRegex l mkElse e1 (Zero _) = e1 -- e1 u {} = e1 mkElse (Zero _) e2 = e2 -- {} u e2 = e2 mkElse (Sym p1) (Sym p2) = mkSym $ p1 `unionCS` p2 -- melting of predicates -- no symmetry allowed mkElse e1@(Sym _) (Else e2@(Sym _) e3) = mkElse (mkElse e1 e2) e3 -- prepare melting of predicates mkElse (Sym _) e2@Dot = e2 -- c|. = . for a c's mkElse e1@(Star Dot) _e2 = e1 -- A* u e1 = A* mkElse _e1 e2@(Star Dot) = e2 -- e1 u A* = A* mkElse (Else e1 e2) e3 = mkElse e1 (mkElse e2 e3) -- associativity mkElse e1 e2 | e1 == e2 = e1 | otherwise = Else e1 e2 -- | Construct the sequence r.e. r1.r2 mkSeq :: GenRegex l -> GenRegex l -> GenRegex l mkSeq e1@(Zero _) _e2 = e1 mkSeq _e1 e2@(Zero _) = e2 mkSeq Unit e2 = e2 mkSeq (Cbr e1 ss1) e2 = mkCbr (mkSeq e1 e2) ss1 -- move finished submatches upwards mkSeq e1 Unit = e1 mkSeq (Seq e1 e2) e3 = mkSeq e1 (mkSeq e2 e3) mkSeq e1 e2 = Seq e1 e2 -- | mkSeq extened to lists mkSeqs :: [GenRegex l] -> GenRegex l mkSeqs = foldr mkSeq mkUnit -- | Construct repetition r{i,} mkRep :: Eq l => Int -> GenRegex l -> GenRegex l mkRep 0 e = mkStar e mkRep _ e@(Zero _) = e mkRep _ e | nullable e = mkStar e mkRep i (Rep j e) = mkRep (i * j) e mkRep i e = Rep i e -- | Construct range r{i,j} mkRng :: Int -> Int -> GenRegex l -> GenRegex l mkRng 0 0 _e = mkUnit mkRng 1 1 e = e mkRng lb ub _e | lb > ub = Zero $ "illegal range " ++ show lb ++ ".." ++ show ub mkRng _l _u e@(Zero _) = e mkRng _l _u e@Unit = e mkRng lb ub e = Rng lb ub e -- | Construct option r? mkOpt :: GenRegex l -> GenRegex l mkOpt = mkRng 0 1 {-# INLINE mkOpt #-} -- | Construct difference r.e.: r1 {\\} r2 -- -- example -- -- > match "[a-z]+{\\}bush" "obama" = True -- > match "[a-z]+{\\}bush" "clinton" = True -- > match "[a-z]+{\\}bush" "bush" = False -- not important any more mkDiff :: Eq l => GenRegex l -> GenRegex l -> GenRegex l mkDiff e1@(Zero _) _e2 = e1 -- {} - r2 = {} mkDiff e1 (Zero _) = e1 -- r1 - {} = r1 mkDiff _e1 (Star Dot) = mkZero "empty set in difference expr" -- r1 - .* = {} mkDiff Dot (Sym p) = mkSym $ compCS p -- . - s = ~s mkDiff (Sym _) Dot = mkZero "empty set in difference expr" -- x - . = {} mkDiff (Sym p1) (Sym p2) = mkSym $ p1 `diffCS` p2 -- set diff mkDiff e1 e2 | e1 == e2 = mkZero "empty set in difference expr" -- r1 - r1 = {} | otherwise = Diff e1 e2 -- | Construct the Complement of an r.e.: whole set of words - r mkCompl :: Eq l => GenRegex l -> GenRegex l mkCompl (Zero _) = mkAll mkCompl (Star Dot) = mkZero "empty set in compl expr" mkCompl e = mkDiff (mkStar mkDot) e -- | Construct r.e. for intersection: r1 {&} r2 -- -- example -- -- > match ".*a.*{&}.*b.*" "-a-b-" = True -- > match ".*a.*{&}.*b.*" "-b-a-" = True -- > match ".*a.*{&}.*b.*" "-a-a-" = False -- > match ".*a.*{&}.*b.*" "---b-" = False mkIsect :: Eq l => GenRegex l -> GenRegex l -> GenRegex l mkIsect e1@(Zero _) _e2 = e1 -- {} n r2 = {} mkIsect _e1 e2@(Zero _) = e2 -- r1 n {} = {} mkIsect e1@(Unit) e2 -- () n r2 = () if nullable r2 | nullable e2 = e1 -- () n r2 = {} if not nullable r2 | otherwise = mkZero "intersection empty" mkIsect e1 e2@(Unit) = mkIsect e2 e1 -- symmetric version of las 2 laws mkIsect (Sym p1) (Sym p2) = mkSym $ p1 `intersectCS` p2 -- intersect sets mkIsect e1@(Sym _) Dot = e1 -- x n . = x mkIsect Dot e2@(Sym _) = e2 -- . n x = x mkIsect (Star Dot) e2 = e2 -- .* n r2 = r2 mkIsect e1 (Star Dot) = e1 -- r1 n .* = r1 mkIsect e1 e2 | e1 == e2 = e1 -- r1 n r1 = r1 | otherwise = Isec e1 e2 -- | Construct r.e. for exclusive or: r1 {^} r2 -- -- example -- -- > match "[a-c]+{^}[c-d]+" "abc" = True -- > match "[a-c]+{^}[c-d]+" "acdc" = False -- > match "[a-c]+{^}[c-d]+" "ccc" = False -- > match "[a-c]+{^}[c-d]+" "cdc" = True mkExor :: Eq l => GenRegex l -> GenRegex l -> GenRegex l mkExor (Zero _) e2 = e2 mkExor e1 (Zero _) = e1 mkExor (Star Dot) _e2 = mkZero "empty set in exor expr" mkExor _e1 (Star Dot) = mkZero "empty set in exor expr" mkExor (Sym p1) (Sym p2) = mkSym $ p1 `exorCS` p2 mkExor (Sym p1) Dot = mkSym $ compCS p1 mkExor Dot (Sym p2) = mkSym $ compCS p2 mkExor e1 e2 | e1 == e2 = mkZero "empty set in exor expr" -- r1 xor r1 = {} | otherwise = Exor e1 e2 mkInterleave :: GenRegex l -> GenRegex l -> GenRegex l mkInterleave e1@(Zero _) _ = e1 mkInterleave _ e2@(Zero _) = e2 mkInterleave (Unit) e2 = e2 mkInterleave e1 (Unit) = e1 mkInterleave e1 e2 = Intl e1 e2 mkBr0 :: Label l -> GenRegex l -> String -> GenRegex l mkBr0 _ e@(Zero _) _ = e mkBr0 l Unit s = mkCbr mkUnit [(l,reverse s)] mkBr0 l e s = Br l e s -- | Construct a labeled subexpression: ({label}r) mkBr :: l -> GenRegex l -> GenRegex l mkBr l e = mkBr0 (Just l) e "" mkBr' :: GenRegex l -> GenRegex l mkBr' e = mkBr0 Nothing e "" mkCbr :: GenRegex l -> [(Label l, String)] -> GenRegex l mkCbr e@(Zero _) _ = e -- dead end, throw away subexpr matches mkCbr (Cbr e ss1) ss = mkCbr e (ss ++ ss1) -- join inner and this subexpr match mkCbr e ss = Cbr e ss -- ------------------------------------------------------------ instance Show l => Show (GenRegex l) where show (Zero e) = "{" ++ e ++ "}" show Unit = "()" show (Sym p) | p == compCS (stringCS "\n\r") = "." | null (tail cs) && rng1 (head cs) = escRng . head $ cs | otherwise = "[" ++ concat cs' ++ "]" where rng1 (x,y) = x == y cs = p -- charRngs . chars $ p cs' = map escRng p escRng (x, y) | x == y = esc x | succ x == y = esc x ++ esc y | otherwise = esc x ++ "-" ++ esc y esc x | x `elem` "\\-[]{}()*+?.^" = '\\':x:"" | x >= ' ' && x <= '~' = x:"" | otherwise = "&#" ++ show (fromEnum x) ++ ";" show Dot = "\\a" show (Star Dot) = "\\A" show (Star e) = "(" ++ show e ++ "*)" show (Alt e1 e2) = "(" ++ show e1 ++ "|" ++ show e2 ++ ")" show (Else e1 e2) = "(" ++ show e1 ++ "{|}" ++ show e2 ++ ")" show (Seq e1 e2) = "(" ++ show e1 ++ show e2 ++ ")" show (Rep 1 e) = "(" ++ show e ++ "+)" show (Rep i e) = "(" ++ show e ++ "{" ++ show i ++ ",})" show (Rng 0 1 e) = "(" ++ show e ++ "?)" show (Rng i j e) = "(" ++ show e ++ "{" ++ show i ++ "," ++ show j ++ "})" show (Diff e1 e2) = "(" ++ show e1 ++ "{\\}" ++ show e2 ++ ")" show (Isec e1 e2) = "(" ++ show e1 ++ "{&}" ++ show e2 ++ ")" show (Exor e1 e2) = "(" ++ show e1 ++ "{^}" ++ show e2 ++ ")" show (Intl e1 e2) = "(" ++ show e1 ++ "{:}" ++ show e2 ++ ")" show (Br l e s) = "({" ++ showL l ++ (if null s then "" else "=" ++ reverse s ) ++ "}" ++ show e ++ ")" show (Cbr e ss) = "([" ++ intercalate "," (map (\(l,s) -> showL l ++ "=" ++ s) ss) ++ "]" ++ show e ++ ")" showL :: Show l => Label l -> String showL = rmq . maybe "" show where rmq ('\"':xs) = init xs rmq xs = xs -- ------------------------------------------------------------ isZero :: GenRegex l -> Bool isZero (Zero _) = True isZero _ = False {-# INLINE isZero #-} errRegex :: GenRegex l -> String errRegex (Zero e) = e errRegex _ = "" -- ------------------------------------------------------------ nullable :: GenRegex l -> Bool nullable = fst . nullable' {-# INLINE nullable #-} nullable' :: GenRegex l -> Nullable l nullable' (Zero _) = (False, []) nullable' Unit = (True, []) nullable' Dot = (False, []) nullable' (Sym _x) = (False, []) nullable' (Star _e) = (True, []) nullable' (Rep _i e) = nullable' e nullable' (Rng i _ e) = (i == 0, []) `unionN` nullable' e nullable' (Seq e1 e2) = nullable' e1 `isectN` nullable' e2 nullable' (Alt e1 e2) = nullable' e1 `unionN` nullable' e2 nullable' (Else e1 e2) = nullable' e1 `orElseN` nullable' e2 nullable' (Isec e1 e2) = nullable' e1 `isectN` nullable' e2 nullable' (Diff e1 e2) = nullable' e1 `diffN` nullable' e2 nullable' (Exor e1 e2) = nullable' e1 `exorN` nullable' e2 nullable' (Intl e1 e2) = nullable' e1 `isectN` nullable' e2 nullable' (Br l e s) = (True, [(l, reverse s)]) `isectN` nullable' e nullable' (Cbr e ss) = (True, ss) `isectN` nullable' e isectN :: Nullable l -> Nullable l -> Nullable l isectN (True, ws1) (True, ws2) = (True, ws1 ++ ws2) isectN _ _ = (False, []) unionN :: Nullable l -> Nullable l -> Nullable l unionN (False, _) (False, _) = (False, []) unionN (_, ws1) (_, ws2) = (True, ws1 ++ ws2) orElseN :: Nullable l -> Nullable l -> Nullable l orElseN e1@(True, _ws1) _ = e1 orElseN _ e2 = e2 diffN :: Nullable l -> Nullable l -> Nullable l diffN n1 (False, _) = n1 diffN _ _ = (False, []) exorN :: Nullable l -> Nullable l -> Nullable l exorN n1@(True, _) (False, _) = n1 exorN (False, _) n2@(True, _) = n2 exorN _ _ = (False, []) -- ------------------------------------------------------------ -- | FIRST for regular expressions -- -- this is only an approximation, the real set of char may be smaller, -- when the expression contains intersection, set difference or exor operators firstChars :: GenRegex l -> CharSet firstChars (Zero _) = emptyCS firstChars Unit = emptyCS firstChars (Sym p) = p firstChars Dot = allCS firstChars (Star e1) = firstChars e1 firstChars (Alt e1 e2) = firstChars e1 `unionCS` firstChars e2 firstChars (Else e1 e2) = firstChars e1 `unionCS` firstChars e2 firstChars (Seq e1 e2) | nullable e1 = firstChars e1 `unionCS` firstChars e2 | otherwise = firstChars e1 firstChars (Rep _i e) = firstChars e firstChars (Rng _i _j e) = firstChars e firstChars (Diff e1 _e2) = firstChars e1 -- this is an approximation firstChars (Isec e1 e2) = firstChars e1 `intersectCS` firstChars e2 -- this is an approximation firstChars (Exor e1 e2) = firstChars e1 `unionCS` firstChars e2 -- this is an approximation firstChars (Intl e1 e2) = firstChars e1 `unionCS` firstChars e2 firstChars (Br _l e _s) = firstChars e firstChars (Cbr e _ss) = firstChars e -- ------------------------------------------------------------ delta1 :: Eq l => GenRegex l -> Char -> GenRegex l delta1 e@(Zero _) _ = e delta1 Unit c = mkZero $ "unexpected char " ++ show c delta1 (Sym p) c | c `elemCS` p = mkUnit | otherwise = mkZero $ "unexpected char " ++ show c delta1 Dot _ = mkUnit delta1 e@(Star Dot) _ = e delta1 e@(Star e1) c = mkSeq (delta1 e1 c) e delta1 (Alt e1 e2) c = mkAlt (delta1 e1 c) (delta1 e2 c) delta1 (Else e1 e2) c = mkElse (delta1 e1 c) (delta1 e2 c) delta1 (Seq e1@(Br l e1' s) e2) c | n = mkAlt (mkSeq (delta1 e1 c) e2) (mkCbr (delta1 e2 c) ((l, reverse s) : ws)) where (n, ws) = nullable' e1' delta1 (Seq e1 e2) c | nullable e1 = mkAlt (mkSeq (delta1 e1 c) e2) (delta1 e2 c) | otherwise = mkSeq (delta1 e1 c) e2 delta1 (Rep i e) c = mkSeq (delta1 e c) (mkRep (i-1) e) delta1 (Rng i j e) c = mkSeq (delta1 e c) (mkRng ((i-1) `max` 0) (j-1) e) delta1 (Diff e1 e2) c = mkDiff (delta1 e1 c) (delta1 e2 c) delta1 (Isec e1 e2) c = mkIsect (delta1 e1 c) (delta1 e2 c) delta1 (Exor e1 e2) c = mkExor (delta1 e1 c) (delta1 e2 c) delta1 (Intl e1 e2) c = mkAlt (mkInterleave (delta1 e1 c) e2 ) (mkInterleave e1 (delta1 e2 c)) delta1 (Br l e s) c = mkBr0 l (delta1 e c) (c:s) delta1 (Cbr e ss) c = mkCbr (delta1 e c) ss -- ------------------------------------------------------------ delta :: Eq l => GenRegex l -> String -> GenRegex l delta e [] = e delta e@(Zero _) _xs = e delta e@(Star Dot) _xs = e delta e (x : xs) = delta (delta1 e x) xs matchWithRegex :: Eq l => GenRegex l -> String -> Bool matchWithRegex e = nullable . delta e matchWithRegex' :: Eq l => GenRegex l -> String -> Maybe [(Label l,String)] matchWithRegex' e = (\ (r, l) -> if r then Just l else Nothing) . nullable' . delta e -- ------------------------------------------------------------ -- | This function wraps the whole regex in a subexpression before starting -- the parse. This is done for getting acces to -- the whole parsed string. Therfore we need one special label, this label -- is the Nothing value, all explicit labels are Just labels. splitWithRegex :: Eq l => GenRegex l -> String -> Maybe ([(Label l,String)], String) splitWithRegex re inp = do (re', rest) <- splitWithRegex' (mkBr' re) inp return ( snd . nullable' $ re', rest) splitWithRegexCS :: Eq l => GenRegex l -> CharSet -> String -> Maybe ([(Label l,String)], String) splitWithRegexCS re cs inp = do (re', rest) <- splitWithRegexCS' (mkBr' re) cs inp return ( snd . nullable' $ re', rest) -- ---------------------------------------- -- -- | The main scanner function {- linear recursive function, can lead to stack overflow splitWithRegex' :: Eq l => GenRegex l -> String -> Maybe (GenRegex l, String) splitWithRegex' re "" | nullable re = Just (re, "") | otherwise = Nothing splitWithRegex' re inp@(c : inp') | isZero re = Nothing | otherwise = evalRes . splitWithRegex' (delta1 re c) $ inp' where evalRes Nothing | nullable re = Just (re, inp) | otherwise = Nothing evalRes res = res -} -- tail recursive version of above function splitWithRegex' :: Eq l => GenRegex l -> String -> Maybe (GenRegex l, String) splitWithRegex' re inp = splitWithRegex'' ( if nullable re then Just (re, inp) -- first possible result: empty prefix else Nothing -- empty prefix not a result ) re inp splitWithRegex'' :: Eq l => Maybe (GenRegex l, String) -> GenRegex l -> String -> Maybe (GenRegex l, String) splitWithRegex'' lastRes _re "" = lastRes splitWithRegex'' lastRes re (c : inp') | isZero re = lastRes | otherwise = splitWithRegex'' nextRes re' $ inp' where re' = delta1 re c nextRes | nullable re' = Just (re', inp') | otherwise = lastRes -- ---------------------------------------- -- -- | speedup version for splitWithRegex' -- -- This function checks whether the input starts with a char from FIRST re. -- If this is not the case, the split fails. The FIRST set can be computed once -- for a whole tokenizer and reused by every call of split splitWithRegexCS' :: Eq l => GenRegex l -> CharSet -> String -> Maybe (GenRegex l, String) splitWithRegexCS' re cs inp@(c : _) | c `elemCS` cs = splitWithRegex' re inp splitWithRegexCS' re _cs inp | nullable re = Just (re, inp) | otherwise = Nothing -- ------------------------------------------------------------