{-# LANGUAGE OverloadedStrings #-}
{- | A module implementing regexp-based split and substitute.

>>> substituteCompile "(a+)" "lapin" "'\\1'"
Right "l'a'pin"

>>> splitCompile "\\d" "a1b2c3"
Right ["a","b","c"]
-}
module Text.Regex.PCRE.ByteString.Utils
    ( -- * Perl-like utility functions
      substitute
    , split
    , substituteCompile
    , splitCompile
    -- * Re-exports from "Text.Regex.PCRE.ByteString"
    , Regex
    , CompOption
    , ExecOption
    , compBlank
    , execBlank
    -- * Pure version of the functions, using 'unsafePerformIO'
    , compile'
    , execute'
    , substitute'
    , split'
    , substituteCompile'
    , splitCompile'
    ) where

import Text.Regex.PCRE.ByteString
import qualified Data.ByteString.Char8 as BS
import Control.Monad.Except
import Data.Attoparsec.ByteString.Char8 as A
import Control.Applicative
import Control.Monad
import Control.Monad.IO.Class
import Data.Char (digitToInt)
import System.IO.Unsafe
import qualified Data.Array as A

{-| Substitutes values matched by a `Regex`. References can be used.

It doesn't support anything else than global substitution for now ..

-}
substitute :: Regex             -- ^ The regular expression, taken from a call to `compile`
           -> BS.ByteString     -- ^ The source string
           -> BS.ByteString     -- ^ The replacement string
           -> IO (Either String BS.ByteString)
substitute :: Regex -> ByteString -> ByteString -> IO (Either String ByteString)
substitute Regex
regexp ByteString
srcstring ByteString
repla = forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT forall a b. (a -> b) -> a -> b
$ do
    [Replacement]
parsedReplacement <- case forall a. Parser a -> ByteString -> Either String a
parseOnly Parser [Replacement]
repparser ByteString
repla of
                             Right [Replacement]
y -> forall (m :: * -> *) a. Monad m => a -> m a
return [Replacement]
y
                             Left String
rr -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String
rr forall a. [a] -> [a] -> [a]
++ String
" when parsing the replacement string")
    [RegexpSplit ByteString]
matches <- Regex -> ByteString -> ExceptT String IO [RegexpSplit ByteString]
getMatches Regex
regexp ByteString
srcstring
    let applyReplacement :: RegexpSplit BS.ByteString -> BS.ByteString
        applyReplacement :: RegexpSplit ByteString -> ByteString
applyReplacement (Unmatched ByteString
x) = ByteString
x
        applyReplacement (Matched ByteString
captured [ByteString]
mcaptures) = ByteString -> [Replacement] -> [ByteString] -> ByteString
applyCaptures ByteString
captured [Replacement]
parsedReplacement [ByteString]
mcaptures
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! [ByteString] -> ByteString
BS.concat forall a b. (a -> b) -> a -> b
$! forall a b. (a -> b) -> [a] -> [b]
map RegexpSplit ByteString -> ByteString
applyReplacement [RegexpSplit ByteString]
matches

-- Transforms the parsed replacement and the vector of captured stuff into
-- the destination ByteString.
applyCaptures :: BS.ByteString -> [Replacement] -> [BS.ByteString] -> BS.ByteString
applyCaptures :: ByteString -> [Replacement] -> [ByteString] -> ByteString
applyCaptures ByteString
firstmatch [Replacement]
repl [ByteString]
mcaptures = [ByteString] -> ByteString
BS.concat (forall a b. (a -> b) -> [a] -> [b]
map Replacement -> ByteString
applyCaptures' [Replacement]
repl)
    where
        ncaptures :: MatchOffset
ncaptures = forall (t :: * -> *) a. Foldable t => t a -> MatchOffset
length [ByteString]
mcaptures
        applyCaptures' :: Replacement -> BS.ByteString
        applyCaptures' :: Replacement -> ByteString
applyCaptures' Replacement
WholeMatch = ByteString
firstmatch
        applyCaptures' (RawReplacement ByteString
r) = ByteString
r
        applyCaptures' (IndexedReplacement MatchOffset
idx)
            | MatchOffset
idx forall a. Ord a => a -> a -> Bool
> MatchOffset
ncaptures Bool -> Bool -> Bool
|| MatchOffset
idx forall a. Ord a => a -> a -> Bool
< MatchOffset
0 = ByteString
""
            | Bool
otherwise = [ByteString]
mcaptures forall a. [a] -> MatchOffset -> a
!! (MatchOffset
idx forall a. Num a => a -> a -> a
- MatchOffset
1)

-- | Splits strings, using a `Regex` as delimiter.
split :: Regex  -- ^ The regular expression, taken from a call to `compile`
      -> BS.ByteString -- ^ The source string
      -> IO (Either String [BS.ByteString])
split :: Regex -> ByteString -> IO (Either String [ByteString])
split Regex
regexp ByteString
srcstring = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ([ByteString] -> [ByteString]
removeEmptyLeft forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [RegexpSplit a] -> [a]
regexpUnmatched) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall e (m :: * -> *) a. ExceptT e m a -> m (Either e a)
runExceptT (Regex -> ByteString -> ExceptT String IO [RegexpSplit ByteString]
getMatches Regex
regexp ByteString
srcstring)
    where
        removeEmptyLeft :: [ByteString] -> [ByteString]
removeEmptyLeft = forall a. [a] -> [a]
reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
dropWhile ByteString -> Bool
BS.null forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse

-- | A pure version of 'substitute', using unsafePerformIO.
substitute' :: Regex             -- ^ The regular expression, taken from a call to `compile`
            -> BS.ByteString     -- ^ The source string
            -> BS.ByteString     -- ^ The replacement string
            -> Either String BS.ByteString
substitute' :: Regex -> ByteString -> ByteString -> Either String ByteString
substitute' Regex
regexp ByteString
srcstring ByteString
repla = forall a. IO a -> a
unsafePerformIO (Regex -> ByteString -> ByteString -> IO (Either String ByteString)
substitute Regex
regexp ByteString
srcstring ByteString
repla)

-- | A pure version of 'split', using unsafePerformIO.
split' :: Regex  -- ^ The regular expression, taken from a call to `compile`
       -> BS.ByteString -- ^ The source string
       -> Either String [BS.ByteString]
split' :: Regex -> ByteString -> Either String [ByteString]
split' Regex
regexp ByteString
srcstring = forall a. IO a -> a
unsafePerformIO (Regex -> ByteString -> IO (Either String [ByteString])
split Regex
regexp ByteString
srcstring)

data RegexpSplit a = Matched a [a]
                   | Unmatched a
                   deriving (MatchOffset -> RegexpSplit a -> ShowS
forall a. Show a => MatchOffset -> RegexpSplit a -> ShowS
forall a. Show a => [RegexpSplit a] -> ShowS
forall a. Show a => RegexpSplit a -> String
forall a.
(MatchOffset -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RegexpSplit a] -> ShowS
$cshowList :: forall a. Show a => [RegexpSplit a] -> ShowS
show :: RegexpSplit a -> String
$cshow :: forall a. Show a => RegexpSplit a -> String
showsPrec :: MatchOffset -> RegexpSplit a -> ShowS
$cshowsPrec :: forall a. Show a => MatchOffset -> RegexpSplit a -> ShowS
Show, RegexpSplit a -> RegexpSplit a -> Bool
forall a. Eq a => RegexpSplit a -> RegexpSplit a -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RegexpSplit a -> RegexpSplit a -> Bool
$c/= :: forall a. Eq a => RegexpSplit a -> RegexpSplit a -> Bool
== :: RegexpSplit a -> RegexpSplit a -> Bool
$c== :: forall a. Eq a => RegexpSplit a -> RegexpSplit a -> Bool
Eq, RegexpSplit a -> RegexpSplit a -> Bool
RegexpSplit a -> RegexpSplit a -> Ordering
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
forall {a}. Ord a => Eq (RegexpSplit a)
forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Ordering
forall a. Ord a => RegexpSplit a -> RegexpSplit a -> RegexpSplit a
min :: RegexpSplit a -> RegexpSplit a -> RegexpSplit a
$cmin :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> RegexpSplit a
max :: RegexpSplit a -> RegexpSplit a -> RegexpSplit a
$cmax :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> RegexpSplit a
>= :: RegexpSplit a -> RegexpSplit a -> Bool
$c>= :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
> :: RegexpSplit a -> RegexpSplit a -> Bool
$c> :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
<= :: RegexpSplit a -> RegexpSplit a -> Bool
$c<= :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
< :: RegexpSplit a -> RegexpSplit a -> Bool
$c< :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Bool
compare :: RegexpSplit a -> RegexpSplit a -> Ordering
$ccompare :: forall a. Ord a => RegexpSplit a -> RegexpSplit a -> Ordering
Ord)

instance Functor RegexpSplit where
    fmap :: forall a b. (a -> b) -> RegexpSplit a -> RegexpSplit b
fmap a -> b
f (Matched a
x [a]
y)   = forall a. a -> [a] -> RegexpSplit a
Matched (a -> b
f a
x) (forall a b. (a -> b) -> [a] -> [b]
map a -> b
f [a]
y)
    fmap a -> b
f (Unmatched a
x) = forall a. a -> RegexpSplit a
Unmatched (a -> b
f a
x)

regexpAll :: [RegexpSplit a] -> [a]
regexpAll :: forall a. [RegexpSplit a] -> [a]
regexpAll = forall a b. (a -> b) -> [a] -> [b]
map forall {a}. RegexpSplit a -> a
unreg
    where
        unreg :: RegexpSplit a -> a
unreg ( Matched a
x [a]
_ ) = a
x
        unreg ( Unmatched a
x ) = a
x

isMatched :: RegexpSplit a -> Bool
isMatched :: forall a. RegexpSplit a -> Bool
isMatched (Matched a
_ [a]
_) = Bool
True
isMatched RegexpSplit a
_ = Bool
False

regexpUnmatched :: [RegexpSplit a] -> [a]
regexpUnmatched :: forall a. [RegexpSplit a] -> [a]
regexpUnmatched = forall a. [RegexpSplit a] -> [a]
regexpAll forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. RegexpSplit a -> Bool
isMatched)

getMatches :: Regex -> BS.ByteString -> ExceptT String IO [RegexpSplit BS.ByteString]
getMatches :: Regex -> ByteString -> ExceptT String IO [RegexpSplit ByteString]
getMatches Regex
_ ByteString
"" = forall (m :: * -> *) a. Monad m => a -> m a
return []
getMatches Regex
creg ByteString
src = do
    Either
  WrapError
  (Maybe (ByteString, ByteString, ByteString, [ByteString]))
x <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ Regex
-> ByteString
-> IO
     (Either
        WrapError
        (Maybe (ByteString, ByteString, ByteString, [ByteString])))
regexec Regex
creg ByteString
src
    case Either
  WrapError
  (Maybe (ByteString, ByteString, ByteString, [ByteString]))
x of
        Left (ReturnCode
rcode, String
rerror) -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (String
"Regexp application error: " forall a. [a] -> [a] -> [a]
++ String
rerror forall a. [a] -> [a] -> [a]
++ String
"(" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show ReturnCode
rcode forall a. [a] -> [a] -> [a]
++ String
")")
        Right Maybe (ByteString, ByteString, ByteString, [ByteString])
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return [forall a. a -> RegexpSplit a
Unmatched ByteString
src]

        -- Now this is a trick, I don't know exactly why this happens, but this happens with empty regexps. We are going to cheat here
        Right (Just (ByteString
"",ByteString
"",ByteString
rm,[ByteString]
_)) -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. (a -> b) -> [a] -> [b]
map (forall a. a -> RegexpSplit a
Unmatched forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString
BS.singleton) (ByteString -> String
BS.unpack ByteString
rm))

        Right (Just (ByteString
before,ByteString
current,ByteString
remaining,[ByteString]
captures)) -> do
            [RegexpSplit ByteString]
remain <- Regex -> ByteString -> ExceptT String IO [RegexpSplit ByteString]
getMatches Regex
creg ByteString
remaining
            forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> RegexpSplit a
Unmatched ByteString
before forall a. a -> [a] -> [a]
: forall a. a -> [a] -> RegexpSplit a
Matched ByteString
current [ByteString]
captures forall a. a -> [a] -> [a]
: [RegexpSplit ByteString]
remain)


data Replacement = RawReplacement BS.ByteString
                 | IndexedReplacement Int
                 | WholeMatch
                 deriving (MatchOffset -> Replacement -> ShowS
[Replacement] -> ShowS
Replacement -> String
forall a.
(MatchOffset -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Replacement] -> ShowS
$cshowList :: [Replacement] -> ShowS
show :: Replacement -> String
$cshow :: Replacement -> String
showsPrec :: MatchOffset -> Replacement -> ShowS
$cshowsPrec :: MatchOffset -> Replacement -> ShowS
Show)

repparser :: Parser [Replacement]
repparser :: Parser [Replacement]
repparser = forall (f :: * -> *) a. Alternative f => f a -> f [a]
many Parser Replacement
replacement forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput

replacement :: Parser Replacement
replacement :: Parser Replacement
replacement = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ByteString -> Replacement
RawReplacement Parser ByteString
rawData forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Replacement
escapedThing

rawData :: Parser BS.ByteString
rawData :: Parser ByteString
rawData = (Char -> Bool) -> Parser ByteString
takeWhile1 (forall a. Eq a => a -> a -> Bool
/= Char
'\\')

escapedThing :: Parser Replacement
escapedThing :: Parser Replacement
escapedThing = do
    forall (f :: * -> *) a. Functor f => f a -> f ()
void (Char -> Parser Char
char Char
'\\')
    let ac :: Parser ByteString
ac = do
            Char
n <- Parser Char
anyChar
            ByteString
r <- Parser ByteString
rawData
            forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Char -> ByteString -> ByteString
BS.cons Char
n ByteString
r
        toReplacement :: MatchOffset -> Replacement
toReplacement MatchOffset
0 = Replacement
WholeMatch
        toReplacement MatchOffset
n = MatchOffset -> Replacement
IndexedReplacement MatchOffset
n
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (MatchOffset -> Replacement
toReplacement forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> MatchOffset
digitToInt) Parser Char
digit forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (ByteString -> Replacement
RawReplacement forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ByteString -> ByteString
BS.cons Char
'\\') Parser ByteString
ac

-- | Compiles the regular expression (using default options) and `substitute`s
substituteCompile :: BS.ByteString     -- ^ The regular expression
                  -> BS.ByteString     -- ^ The source string
                  -> BS.ByteString     -- ^ The replacement string
                  -> IO (Either String BS.ByteString)
substituteCompile :: ByteString
-> ByteString -> ByteString -> IO (Either String ByteString)
substituteCompile ByteString
regexp ByteString
srcstring ByteString
repla = do
    Either (MatchOffset, String) Regex
re <- CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset, String) Regex)
compile CompOption
compBlank ExecOption
execBlank ByteString
regexp
    case Either (MatchOffset, String) Regex
re of
        Right Regex
cre -> Regex -> ByteString -> ByteString -> IO (Either String ByteString)
substitute Regex
cre ByteString
srcstring ByteString
repla
        Left (MatchOffset, String)
rr   -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"Regexp compilation failed: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (MatchOffset, String)
rr

-- | Compiles the regular expression (using default options) and `split`s.
splitCompile :: BS.ByteString -- ^ The regular expression
             -> BS.ByteString -- ^ The source string
             -> IO (Either String [BS.ByteString])
splitCompile :: ByteString -> ByteString -> IO (Either String [ByteString])
splitCompile ByteString
regexp ByteString
srcstring = do
    Either (MatchOffset, String) Regex
re <- CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset, String) Regex)
compile CompOption
compBlank ExecOption
execBlank ByteString
regexp
    case Either (MatchOffset, String) Regex
re of
        Right Regex
cre -> Regex -> ByteString -> IO (Either String [ByteString])
split Regex
cre ByteString
srcstring
        Left (MatchOffset, String)
rr   -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ String
"Regexp compilation failed: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (MatchOffset, String)
rr

-- | A pure version of 'compile', using unsafePerformIO.
compile' :: CompOption -> ExecOption -> BS.ByteString -> Either (MatchOffset, String) Regex
compile' :: CompOption
-> ExecOption -> ByteString -> Either (MatchOffset, String) Regex
compile' CompOption
co ExecOption
eo ByteString
s = forall a. IO a -> a
unsafePerformIO (CompOption
-> ExecOption
-> ByteString
-> IO (Either (MatchOffset, String) Regex)
compile CompOption
co ExecOption
eo ByteString
s)

-- | A pure version of 'execute', using unsafePerformIO.
execute' :: Regex -> BS.ByteString -> Either WrapError (Maybe (A.Array Int (MatchOffset, MatchLength)))
execute' :: Regex
-> ByteString
-> Either
     WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset)))
execute' Regex
r ByteString
s = forall a. IO a -> a
unsafePerformIO (Regex
-> ByteString
-> IO
     (Either
        WrapError (Maybe (Array MatchOffset (MatchOffset, MatchOffset))))
execute Regex
r ByteString
s)

-- | A pure version of 'substituteCompile', using unsafePerformIO.
substituteCompile' :: BS.ByteString     -- ^ The regular expression
                   -> BS.ByteString     -- ^ The source string
                   -> BS.ByteString     -- ^ The replacement string
                   -> Either String BS.ByteString
substituteCompile' :: ByteString -> ByteString -> ByteString -> Either String ByteString
substituteCompile' ByteString
regexp ByteString
srcstring ByteString
repla = forall a. IO a -> a
unsafePerformIO (ByteString
-> ByteString -> ByteString -> IO (Either String ByteString)
substituteCompile ByteString
regexp ByteString
srcstring ByteString
repla)

-- | A pure version of 'splitCompile', using unsafePerformIO.
splitCompile' :: BS.ByteString -- ^ The regular expression
              -> BS.ByteString -- ^ The source string
              -> Either String [BS.ByteString]
splitCompile' :: ByteString -> ByteString -> Either String [ByteString]
splitCompile' ByteString
regexp ByteString
srcstring = forall a. IO a -> a
unsafePerformIO (ByteString -> ByteString -> IO (Either String [ByteString])
splitCompile ByteString
regexp ByteString
srcstring)