-------------------------------------------------------------------- -- | -- Module : Text.Regex.Applicative.Text -- Copyright : (c) 2015 Oleg Grenrus -- License : BSD3 -- -- Maintainer: Oleg Grenrus -- Stability : experimental -- -- @Text.Regex.Applicative@ API specialised to 'Char' and 'Text'. -------------------------------------------------------------------- {-# LANGUAGE CPP #-} #if __GLASGOW_HASKELL__ >=704 {-# LANGUAGE Safe #-} #endif module Text.Regex.Applicative.Text ( -- * Types RE' , R.RE -- * Smart constructors , sym , psym , msym , anySym , string , reFoldl , R.Greediness(..) , few , withMatched -- * Basic matchers , match , (=~) , replace -- * Advanced matchers , findFirstPrefix , findLongestPrefix , findShortestPrefix , findFirstInfix , findLongestInfix , findShortestInfix -- * Module re-exports , module Control.Applicative ) where import Control.Applicative import Control.Arrow import Data.Monoid (mappend) import Data.Text (Text) import qualified Data.Text as T import qualified Text.Regex.Applicative as R -- | Convenience alias for 'RE' working (also) on 'Text'. type RE' a = R.RE Char a -- | Match and return a single 'Char' which satisfies the predicate psym :: (Char -> Bool) -> RE' Char psym = R.psym -- | Like 'psym', but allows to return a computed value instead of the -- original symbol msym :: (Char -> Maybe a) -> RE' a msym = R.msym -- | Match and return the given symbol sym :: Char -> RE' Char sym = R.sym -- | Match and return any single symbol anySym :: RE' Char anySym = R.anySym -- | Match and return the given 'Text'. -- -- -- > import Text.Regex.Applicative -- > -- > number = string "one" *> pure 1 <|> string "two" *> pure 2 -- > -- > main = print $ "two" =~ number string :: Text -> RE' Text string = fmap T.pack . R.string . T.unpack -- | Match zero or more instances of the given expression, which are combined using -- the given folding function. -- -- 'Greediness' argument controls whether this regular expression should match -- as many as possible ('Greedy') or as few as possible ('NonGreedy') instances -- of the underlying expression. reFoldl :: R.Greediness -> (b -> a -> b) -> b -> RE' a -> RE' b reFoldl = R.reFoldl -- | Match zero or more instances of the given expression, but as -- few of them as possible (i.e. /non-greedily/). A greedy equivalent of 'few' -- is 'many'.x -- -- > >>> findFirstPrefix (few anySym <* "b") "ababab" -- > Just ("a","abab") -- > >>> findFirstPrefix (many anySym <* "b") "ababab" -- > Just ("ababa","") few :: RE' a -> RE' [a] few = R.few -- | Return matched symbols as part of the return value withMatched :: RE' a -> RE' (a, Text) withMatched = fmap (second T.pack) . R.withMatched -- | @s =~ a = match a s@ (=~) :: Text -> RE' a -> Maybe a (=~) = flip match infix 2 =~ -- | Attempt to match a 'Text' against the regular expression. -- Note that the whole string (not just some part of it) should be matched. -- -- > >>> match (sym 'a' <|> sym 'b') "a" -- > Just 'a' -- > >>> match (sym 'a' <|> sym 'b') "ab" -- > Nothing -- match :: RE' a -> Text -> Maybe a match = reTextF R.match -- | Find a string prefix which is matched by the regular expression. -- -- Of all matching prefixes, pick one using left bias (prefer the left part of -- '<|>' to the right part) and greediness. -- -- This is the match which a backtracking engine (such as Perl's one) would find -- first. -- -- If match is found, the rest of the input is also returned. -- -- > >>> findFirstPrefix ("a" <|> "ab") "abc" -- > Just ("a","bc") -- > >>> findFirstPrefix ("ab" <|> "a") "abc" -- > Just ("ab","c") -- > >>> findFirstPrefix "bc" "abc" -- > Nothing findFirstPrefix :: RE' a -> Text -> Maybe (a, Text) findFirstPrefix = fmap pairF .: reTextF R.findFirstPrefix -- | Find the longest string prefix which is matched by the regular expression. -- -- Submatches are still determined using left bias and greediness, so this is -- different from POSIX semantics. -- -- If match is found, the rest of the input is also returned. -- -- -- > >>> let keyword = "if" -- > >>> let identifier = many $ psym isAlpha -- > >>> let lexeme = (Left <$> keyword) <|> (Right <$> identifier) -- > >>> findLongestPrefix lexeme "if foo" -- > Just (Left "if"," foo") -- > >>> findLongestPrefix lexeme "iffoo" -- > Just (Right "iffoo","") findLongestPrefix :: RE' a -> Text -> Maybe (a, Text) findLongestPrefix = fmap pairF .: reTextF R.findLongestPrefix -- | Find the shortest prefix (analogous to 'findLongestPrefix') findShortestPrefix :: RE' a -> Text -> Maybe (a, Text) findShortestPrefix = fmap pairF .: reTextF R.findShortestPrefix -- | Find the leftmost substring that is matched by the regular expression. -- Otherwise behaves like 'findFirstPrefix'. Returns the result together with -- the prefix and suffix of the string surrounding the match. findFirstInfix :: RE' a -> Text -> Maybe (Text, a, Text) findFirstInfix = fmap tripleF .: reTextF R.findFirstInfix -- | Find the leftmost substring that is matched by the regular expression. -- Otherwise behaves like 'findLongestPrefix'. Returns the result together with -- the prefix and suffix of the string surrounding the match. findLongestInfix :: RE' a -> Text -> Maybe (Text, a, Text) findLongestInfix = fmap tripleF .: reTextF R.findLongestInfix -- | Find the leftmost substring that is matched by the regular expression. -- Otherwise behaves like 'findShortestPrefix'. Returns the result together with -- the prefix and suffix of the string surrounding the match. findShortestInfix :: RE' a -> Text -> Maybe (Text, a, Text) findShortestInfix = fmap tripleF .: reTextF R.findShortestInfix -- | Replace matches of regular expression with it's value. -- -- > >>> replace ("!" <$ sym 'f' <* some (sym 'o')) "quuxfoofooooofoobarfobar" -- > "quux!!!bar!bar" replace :: RE' Text -> Text -> Text replace r = go . T.unpack where go :: String -> Text go [] = T.empty go ys@(x:xs) = case R.findLongestPrefix r ys of Nothing -> T.cons x (go xs) Just (prefix, rest) -> prefix `mappend` go rest -- Helpers reTextF :: (a -> String -> b) -> (a -> Text -> b) reTextF f a s = f a (T.unpack s) {- INLINE reTextF -} pairF :: (a, String) -> (a, Text) pairF (x, y) = (x, T.pack y) {-# INLINE pairF #-} tripleF :: (String, a, String) -> (Text, a, Text) tripleF (x, y, z) = (T.pack x, y, T.pack z) {-# INLINE tripleF #-} (.:) :: (c -> d) -> (a -> b -> c) -> (a -> b -> d) f .: g = \a b -> f (g a b) {-# INLINE (.:) #-}