{-# LANGUAGE OverloadedStrings #-}
module Text.Regex.PCRE.ByteString.Utils
(
substitute
, split
, substituteCompile
, splitCompile
, Regex
, CompOption
, ExecOption
, compBlank
, execBlank
, compile'
, execute'
, substitute'
, split'
, substituteCompile'
, splitCompile'
) where
import Text.Regex.PCRE.ByteString
import qualified Data.ByteString.Char8 as BS
import Control.Monad.Except
import Data.Attoparsec.ByteString.Char8 as A
import Control.Applicative
import Control.Monad
import Control.Monad.IO.Class
import Data.Char (digitToInt)
import System.IO.Unsafe
import qualified Data.Array as A
substitute :: Regex
-> BS.ByteString
-> BS.ByteString
-> IO (Either String BS.ByteString)
substitute :: Regex -> ByteString -> ByteString -> IO (Either String ByteString)
substitute Regex
regexp ByteString
srcstring ByteString
repla = forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT forall a b. (a -> b) -> a -> b
$ do
[Replacement]
parsedReplacement <- case forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Replacement]
repparser ByteString
repla of
Right [Replacement]
y -> forall (m :: * -> *) a. Monad m => a -> m a
return [Replacement]
y
Left String
rr -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String
rr forall a. [a] -> [a] -> [a]
++ String
" when parsing the replacement string")
[RegexpSplit ByteString]
matches <- Regex -> ByteString -> ExceptT String IO [RegexpSplit ByteString]
getMatches Regex
regexp ByteString
srcstring
let applyReplacement :: RegexpSplit BS.ByteString -> BS.ByteString
applyReplacement :: RegexpSplit ByteString -> ByteString
applyReplacement (Unmatched ByteString
x) = ByteString
x
applyReplacement (Matched ByteString
captured [ByteString]
mcaptures) = ByteString -> [Replacement] -> [ByteString] -> ByteString
applyCaptures ByteString
captured [Replacement]
parsedReplacement [ByteString]
mcaptures
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! [ByteString] -> ByteString
BS.concat forall a b. (a -> b) -> a -> b
$! forall a b. (a -> b) -> [a] -> [b]
map RegexpSplit ByteString -> ByteString
applyReplacement [RegexpSplit ByteString]
matches
applyCaptures :: BS.ByteString -> [Replacement] -> [BS.ByteString] -> BS.ByteString
applyCaptures :: ByteString -> [Replacement] -> [ByteString] -> ByteString
applyCaptures ByteString
firstmatch [Replacement]
repl [ByteString]
mcaptures = [ByteString] -> ByteString
BS.concat (forall a b. (a -> b) -> [a] -> [b]
map Replacement -> ByteString
applyCaptures' [Replacement]
repl)
where
ncaptures :: MatchOffset
ncaptures = forall (t :: * -> *) a. Foldable t => t a -> MatchOffset
length [ByteString]
mcaptures
applyCaptures' :: Replacement -> BS.ByteString
applyCaptures' :: Replacement -> ByteString
applyCaptures' Replacement
WholeMatch = ByteString
firstmatch
applyCaptures' (RawReplacement ByteString
r) = ByteString
r
applyCaptures' (IndexedReplacement MatchOffset
idx)
| MatchOffset
idx forall a. Ord a => a -> a -> Bool
> MatchOffset
ncaptures Bool -> Bool -> Bool
|| MatchOffset
idx forall a. Ord a => a -> a -> Bool
< MatchOffset
0 = ByteString
""
| Bool
otherwise = [ByteString]
mcaptures forall a. [a] -> MatchOffset -> a
!! (MatchOffset
idx forall a. Num a => a -> a -> a
- MatchOffset
1)
split :: Regex
-> BS.ByteString
-> IO (Either String [BS.ByteString])
split :: Regex -> ByteString -> IO (Either String [ByteString])
split Regex
regexp ByteString
srcstring = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([ByteString] -> [ByteString]
removeEmptyLeft forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [RegexpSplit a] -> [a]
regexpUnmatched) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT (Regex -> ByteString -> ExceptT String IO [RegexpSplit ByteString]
getMatches Regex
regexp ByteString
srcstring)
where
removeEmptyLeft :: [ByteString] -> [ByteString]
removeEmptyLeft = forall a. [a] -> [a]
reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
dropWhile ByteString -> Bool
BS.null forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse
substitute' :: Regex
-> BS.ByteString
-> BS.ByteString
-> Either String BS.ByteString
substitute' :: Regex -> ByteString -> ByteString -> Either String ByteString
substitute' Regex
regexp ByteString
srcstring ByteString
repla = forall a. IO a -> a
unsafePerformIO (Regex -> ByteString -> ByteString -> IO (Either String ByteString)
substitute Regex
regexp ByteString
srcstring ByteString
repla)
split' :: Regex
-> BS.ByteString
-> Either String [BS.ByteString]
split' :: Regex -> ByteString -> Either String [ByteString]
split' Regex
regexp ByteString
srcstring = forall a. IO a -> a
unsafePerformIO (Regex -> ByteString -> IO (Either String [ByteString])
split Regex
regexp ByteString
srcstring)
data RegexpSplit a = Matched a [a]
| Unmatched a
deriving (MatchOffset -> RegexpSplit a -> ShowS
forall a. Show a => MatchOffset -> RegexpSplit a -> ShowS
forall a. Show a => [RegexpSplit a] -> ShowS
forall a. Show a => RegexpSplit a -> String
forall a.
(MatchOffset -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RegexpSplit a] -> ShowS
$cshowList :: forall a. Show a => [RegexpSplit a] -> ShowS
show :: RegexpSplit a -> String
$cshow :: forall a. Show a => RegexpSplit a -> String
showsPrec :: MatchOffset -> RegexpSplit a -> ShowS
$cshowsPrec :: forall a. Show a => MatchOffset -> RegexpSplit a -> ShowS
Show, RegexpSplit a -> RegexpSplit a -> Bool
forall a. Eq a => RegexpSplit a -> RegexpSplit a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RegexpSplit a -> RegexpSplit a -> Bool
$c/= :: forall a. Eq a => RegexpSplit a -> RegexpSplit a -> Bool
== :: RegexpSplit a -> RegexpSplit a -> Bool
$c== :: forall a. Eq a => RegexpSplit a -> RegexpSplit a -> Bool
Eq, RegexpSplit a -> RegexpSplit a -> Bool
RegexpSplit a -> RegexpSplit a -> Ordering
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall {a}. Ord a => Eq (RegexpSplit a)
forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Ordering
forall a. Ord a => RegexpSplit a -> RegexpSplit a -> RegexpSplit a
min :: RegexpSplit a -> RegexpSplit a -> RegexpSplit a
$cmin :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> RegexpSplit a
max :: RegexpSplit a -> RegexpSplit a -> RegexpSplit a
$cmax :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> RegexpSplit a
>= :: RegexpSplit a -> RegexpSplit a -> Bool
$c>= :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
> :: RegexpSplit a -> RegexpSplit a -> Bool
$c> :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
<= :: RegexpSplit a -> RegexpSplit a -> Bool
$c<= :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
< :: RegexpSplit a -> RegexpSplit a -> Bool
$c< :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
compare :: RegexpSplit a -> RegexpSplit a -> Ordering
$ccompare :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Ordering
Ord)
instance Functor RegexpSplit where
fmap :: forall a b. (a -> b) -> RegexpSplit a -> RegexpSplit b
fmap a -> b
f (Matched a
x [a]
y) = forall a. a -> [a] -> RegexpSplit a
Matched (a -> b
f a
x) (forall a b. (a -> b) -> [a] -> [b]
map a -> b
f [a]
y)
fmap a -> b
f (Unmatched a
x) = forall a. a -> RegexpSplit a
Unmatched (a -> b
f a
x)
regexpAll :: [RegexpSplit a] -> [a]
regexpAll :: forall a. [RegexpSplit a] -> [a]
regexpAll = forall a b. (a -> b) -> [a] -> [b]
map forall {a}. RegexpSplit a -> a
unreg
where
unreg :: RegexpSplit a -> a
unreg ( Matched a
x [a]
_ ) = a
x
unreg ( Unmatched a
x ) = a
x
isMatched :: RegexpSplit a -> Bool
isMatched :: forall a. RegexpSplit a -> Bool
isMatched (Matched a
_ [a]
_) = Bool
True
isMatched RegexpSplit a
_ = Bool
False
regexpUnmatched :: [RegexpSplit a] -> [a]
regexpUnmatched :: forall a. [RegexpSplit a] -> [a]
regexpUnmatched = forall a. [RegexpSplit a] -> [a]
regexpAll forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. RegexpSplit a -> Bool
isMatched)
getMatches :: Regex -> BS.ByteString -> ExceptT String IO [RegexpSplit BS.ByteString]
getMatches :: Regex -> ByteString -> ExceptT String IO [RegexpSplit ByteString]
getMatches Regex
_ ByteString
"" = forall (m :: * -> *) a. Monad m => a -> m a
return []
getMatches Regex
creg ByteString
src = do
Either
WrapError
(Maybe (ByteString, ByteString, ByteString, [ByteString]))
x <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ Regex
-> ByteString
-> IO
(Either
WrapError
(Maybe (ByteString, ByteString, ByteString, [ByteString])))
regexec Regex
creg ByteString
src
case Either
WrapError
(Maybe (ByteString, ByteString, ByteString, [ByteString]))
x of
Left (ReturnCode
rcode, String
rerror) -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String
"Regexp application error: " forall a. [a] -> [a] -> [a]
++ String
rerror forall a. [a] -> [a] -> [a]
++ String
"(" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show ReturnCode
rcode forall a. [a] -> [a] -> [a]
++ String
")")
Right Maybe (ByteString, ByteString, ByteString, [ByteString])
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return [forall a. a -> RegexpSplit a
Unmatched ByteString
src]
Right (Just (ByteString
"",ByteString
"",ByteString
rm,[ByteString]
_)) -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. (a -> b) -> [a] -> [b]
map (forall a. a -> RegexpSplit a
Unmatched forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString
BS.singleton) (ByteString -> String
BS.unpack ByteString
rm))
Right (Just (ByteString
before,ByteString
current,ByteString
remaining,[ByteString]
captures)) -> do
[RegexpSplit ByteString]
remain <- Regex -> ByteString -> ExceptT String IO [RegexpSplit ByteString]
getMatches Regex
creg ByteString
remaining
forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> RegexpSplit a
Unmatched ByteString
before forall a. a -> [a] -> [a]
: forall a. a -> [a] -> RegexpSplit a
Matched ByteString
current [ByteString]
captures forall a. a -> [a] -> [a]
: [RegexpSplit ByteString]
remain)
data Replacement = RawReplacement BS.ByteString
| IndexedReplacement Int
| WholeMatch
deriving (MatchOffset -> Replacement -> ShowS
[Replacement] -> ShowS
Replacement -> String
forall a.
(MatchOffset -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Replacement] -> ShowS
$cshowList :: [Replacement] -> ShowS
show :: Replacement -> String
$cshow :: Replacement -> String
showsPrec :: MatchOffset -> Replacement -> ShowS
$cshowsPrec :: MatchOffset -> Replacement -> ShowS
Show)
repparser :: Parser [Replacement]
repparser :: Parser [Replacement]
repparser = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Replacement
replacement forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput
replacement :: Parser Replacement
replacement :: Parser Replacement
replacement = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Replacement
RawReplacement Parser ByteString
rawData forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Replacement
escapedThing
rawData :: Parser BS.ByteString
rawData :: Parser ByteString
rawData = (Char -> Bool) -> Parser ByteString
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
'\\')
escapedThing :: Parser Replacement
escapedThing :: Parser Replacement
escapedThing = do
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Char -> Parser Char
char Char
'\\')
let ac :: Parser ByteString
ac = do
Char
n <- Parser Char
anyChar
ByteString
r <- Parser ByteString
rawData
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> ByteString -> ByteString
BS.cons Char
n ByteString
r
toReplacement :: MatchOffset -> Replacement
toReplacement MatchOffset
0 = Replacement
WholeMatch
toReplacement MatchOffset
n = MatchOffset -> Replacement
IndexedReplacement MatchOffset
n
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (MatchOffset -> Replacement
toReplacement forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> MatchOffset
digitToInt) Parser Char
digit forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (ByteString -> Replacement
RawReplacement forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString -> ByteString
BS.cons Char
'\\') Parser ByteString
ac
substituteCompile :: BS.ByteString
-> BS.ByteString
-> BS.ByteString
-> IO (Either String BS.ByteString)
substituteCompile :: ByteString
-> ByteString -> ByteString -> IO (Either String ByteString)
substituteCompile ByteString
regexp ByteString
srcstring ByteString
repla = do
Either (MatchOffset, String) Regex
re <- CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset, String) Regex)
compile CompOption
compBlank ExecOption
execBlank ByteString
regexp
case Either (MatchOffset, String) Regex
re of
Right Regex
cre -> Regex -> ByteString -> ByteString -> IO (Either String ByteString)
substitute Regex
cre ByteString
srcstring ByteString
repla
Left (MatchOffset, String)
rr -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"Regexp compilation failed: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (MatchOffset, String)
rr
splitCompile :: BS.ByteString
-> BS.ByteString
-> IO (Either String [BS.ByteString])
splitCompile :: ByteString -> ByteString -> IO (Either String [ByteString])
splitCompile ByteString
regexp ByteString
srcstring = do
Either (MatchOffset, String) Regex
re <- CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset, String) Regex)
compile CompOption
compBlank ExecOption
execBlank ByteString
regexp
case Either (MatchOffset, String) Regex
re of
Right Regex
cre -> Regex -> ByteString -> IO (Either String [ByteString])
split Regex
cre ByteString
srcstring
Left (MatchOffset, String)
rr -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"Regexp compilation failed: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (MatchOffset, String)
rr
compile' :: CompOption -> ExecOption -> BS.ByteString -> Either (MatchOffset, String) Regex
compile' :: CompOption
-> ExecOption -> ByteString -> Either (MatchOffset, String) Regex
compile' CompOption
co ExecOption
eo ByteString
s = forall a. IO a -> a
unsafePerformIO (CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset, String) Regex)
compile CompOption
co ExecOption
eo ByteString
s)
execute' :: Regex -> BS.ByteString -> Either WrapError (Maybe (A.Array Int (MatchOffset, MatchLength)))
execute' :: Regex
-> ByteString
-> Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
execute' Regex
r ByteString
s = forall a. IO a -> a
unsafePerformIO (Regex
-> ByteString
-> IO
(Either
WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
execute Regex
r ByteString
s)
substituteCompile' :: BS.ByteString
-> BS.ByteString
-> BS.ByteString
-> Either String BS.ByteString
substituteCompile' :: ByteString -> ByteString -> ByteString -> Either String ByteString
substituteCompile' ByteString
regexp ByteString
srcstring ByteString
repla = forall a. IO a -> a
unsafePerformIO (ByteString
-> ByteString -> ByteString -> IO (Either String ByteString)
substituteCompile ByteString
regexp ByteString
srcstring ByteString
repla)
splitCompile' :: BS.ByteString
-> BS.ByteString
-> Either String [BS.ByteString]
splitCompile' :: ByteString -> ByteString -> Either String [ByteString]
splitCompile' ByteString
regexp ByteString
srcstring = forall a. IO a -> a
unsafePerformIO (ByteString -> ByteString -> IO (Either String [ByteString])
splitCompile ByteString
regexp ByteString
srcstring)