{-|
Module      : Z.Data.Text.Regex
Description : RE2 regex
Copyright   : (c) Dong Han, 2017-2018
License     : BSD
Maintainer  : winterland1989@gmail.com
Stability   : experimental
Portability : non-portable

Binding to google's <https://github.com/google/re2 RE2>, microsoft did a nice job on RE2 regex syntaxs:
<https://docs.microsoft.com/en-us/deployedge/edge-learnmore-regex>. Note GHC string literals need @\\@ to
be escaped, e.g.

>>> match (regex "([a-z0-9_\\.-]+)@([\\da-z\\.-]+)\\.([a-z\\.]{2,6})") "please end email to hello@world.com, foo@bar.com"
>>> ("hello@world.com",[Just "hello",Just "world",Just "com"],", foo@bar.com")

-}

module Z.Data.Text.Regex
  ( -- * RE2 regex
    Regex, regex, RegexOpts(..), defaultRegexOpts, regexOpts
  , escape, regexCaptureNum, regexPattern
  , RegexException(..)
  -- * regex operations
  , test
  , match
  , replace
  , extract
  ) where

import Control.Exception
import Control.Monad
import Data.Int
import Data.Word
import GHC.Stack
import GHC.Generics
import Foreign.Marshal.Utils            (fromBool)
import System.IO.Unsafe
import qualified Z.Data.Text.Base       as T
import qualified Z.Data.Text.Print      as T
import qualified Z.Data.Vector.Base     as V
import qualified Z.Data.Array           as A
import Z.Foreign.CPtr
import Z.Foreign

-- | A compiled RE2 regex.
data Regex = Regex
    { Regex -> CPtr Regex
regexPtr        :: {-# UNPACK #-} !(CPtr Regex)
    , Regex -> Int
regexCaptureNum :: {-# UNPACK #-} !Int            -- ^ capturing group number(including @\\0@)
    , Regex -> Text
regexPattern    :: T.Text                         -- ^ Get back regex's pattern.
    } deriving (Int -> Regex -> ShowS
[Regex] -> ShowS
Regex -> String
(Int -> Regex -> ShowS)
-> (Regex -> String) -> ([Regex] -> ShowS) -> Show Regex
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Regex] -> ShowS
$cshowList :: [Regex] -> ShowS
show :: Regex -> String
$cshow :: Regex -> String
showsPrec :: Int -> Regex -> ShowS
$cshowsPrec :: Int -> Regex -> ShowS
Show, (forall x. Regex -> Rep Regex x)
-> (forall x. Rep Regex x -> Regex) -> Generic Regex
forall x. Rep Regex x -> Regex
forall x. Regex -> Rep Regex x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Regex x -> Regex
$cfrom :: forall x. Regex -> Rep Regex x
Generic)
      deriving anyclass Int -> Regex -> Builder ()
(Int -> Regex -> Builder ()) -> Print Regex
forall a. (Int -> a -> Builder ()) -> Print a
toUTF8BuilderP :: Int -> Regex -> Builder ()
$ctoUTF8BuilderP :: Int -> Regex -> Builder ()
T.Print

-- | RE2 Regex options.
--
-- The options are ('defaultRegexOpts' in parentheses):
--
-- @
--   posix_syntax     (false) restrict regexps to POSIX egrep syntax
--   longest_match    (false) search for longest match, not first match
--   log_errors       (true)  log syntax and execution errors to ERROR
--   max_mem          (8<<20)  approx. max memory footprint of RE2
--   literal          (false) interpret string as literal, not regexp
--   never_nl         (false) never match \\n, even if it is in regexp
--   dot_nl           (false) dot matches everything including new line
--   never_capture    (false) parse all parens as non-capturing
--   case_sensitive   (true)  match is case-sensitive (regexp can override
--                              with (?i) unless in posix_syntax mode)
-- @
--
-- The following options are only consulted when posix_syntax == true.
-- When posix_syntax == false, these features are always enabled and
-- cannot be turned off; to perform multi-line matching in that case,
-- begin the regexp with @(?m)@.
--
-- @
--   perl_classes     (false) allow Perl's \\d \\s \\w \\D \\S \\W
--   word_boundary    (false) allow Perl's \\b \\B (word boundary and not)
--   one_line         (false) ^ and $ only match beginning and end of text
-- @
--
data RegexOpts = RegexOpts
    { RegexOpts -> Bool
posix_syntax   :: Bool
    , RegexOpts -> Bool
longest_match  :: Bool

    , RegexOpts -> Int64
max_mem        :: {-# UNPACK #-} !Int64
    , RegexOpts -> Bool
literal        :: Bool
    , RegexOpts -> Bool
never_nl       :: Bool
    , RegexOpts -> Bool
dot_nl         :: Bool
    , RegexOpts -> Bool
never_capture  :: Bool
    , RegexOpts -> Bool
case_sensitive :: Bool


    , RegexOpts -> Bool
perl_classes   :: Bool
    , RegexOpts -> Bool
word_boundary  :: Bool
    , RegexOpts -> Bool
one_line       :: Bool
    } deriving (RegexOpts -> RegexOpts -> Bool
(RegexOpts -> RegexOpts -> Bool)
-> (RegexOpts -> RegexOpts -> Bool) -> Eq RegexOpts
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RegexOpts -> RegexOpts -> Bool
$c/= :: RegexOpts -> RegexOpts -> Bool
== :: RegexOpts -> RegexOpts -> Bool
$c== :: RegexOpts -> RegexOpts -> Bool
Eq, Eq RegexOpts
Eq RegexOpts
-> (RegexOpts -> RegexOpts -> Ordering)
-> (RegexOpts -> RegexOpts -> Bool)
-> (RegexOpts -> RegexOpts -> Bool)
-> (RegexOpts -> RegexOpts -> Bool)
-> (RegexOpts -> RegexOpts -> Bool)
-> (RegexOpts -> RegexOpts -> RegexOpts)
-> (RegexOpts -> RegexOpts -> RegexOpts)
-> Ord RegexOpts
RegexOpts -> RegexOpts -> Bool
RegexOpts -> RegexOpts -> Ordering
RegexOpts -> RegexOpts -> RegexOpts
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: RegexOpts -> RegexOpts -> RegexOpts
$cmin :: RegexOpts -> RegexOpts -> RegexOpts
max :: RegexOpts -> RegexOpts -> RegexOpts
$cmax :: RegexOpts -> RegexOpts -> RegexOpts
>= :: RegexOpts -> RegexOpts -> Bool
$c>= :: RegexOpts -> RegexOpts -> Bool
> :: RegexOpts -> RegexOpts -> Bool
$c> :: RegexOpts -> RegexOpts -> Bool
<= :: RegexOpts -> RegexOpts -> Bool
$c<= :: RegexOpts -> RegexOpts -> Bool
< :: RegexOpts -> RegexOpts -> Bool
$c< :: RegexOpts -> RegexOpts -> Bool
compare :: RegexOpts -> RegexOpts -> Ordering
$ccompare :: RegexOpts -> RegexOpts -> Ordering
$cp1Ord :: Eq RegexOpts
Ord, Int -> RegexOpts -> ShowS
[RegexOpts] -> ShowS
RegexOpts -> String
(Int -> RegexOpts -> ShowS)
-> (RegexOpts -> String)
-> ([RegexOpts] -> ShowS)
-> Show RegexOpts
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RegexOpts] -> ShowS
$cshowList :: [RegexOpts] -> ShowS
show :: RegexOpts -> String
$cshow :: RegexOpts -> String
showsPrec :: Int -> RegexOpts -> ShowS
$cshowsPrec :: Int -> RegexOpts -> ShowS
Show, (forall x. RegexOpts -> Rep RegexOpts x)
-> (forall x. Rep RegexOpts x -> RegexOpts) -> Generic RegexOpts
forall x. Rep RegexOpts x -> RegexOpts
forall x. RegexOpts -> Rep RegexOpts x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep RegexOpts x -> RegexOpts
$cfrom :: forall x. RegexOpts -> Rep RegexOpts x
Generic)
      deriving anyclass Int -> RegexOpts -> Builder ()
(Int -> RegexOpts -> Builder ()) -> Print RegexOpts
forall a. (Int -> a -> Builder ()) -> Print a
toUTF8BuilderP :: Int -> RegexOpts -> Builder ()
$ctoUTF8BuilderP :: Int -> RegexOpts -> Builder ()
T.Print

-- | Default regex options, see 'RegexOpts'.
--
defaultRegexOpts :: RegexOpts
defaultRegexOpts :: RegexOpts
defaultRegexOpts = Bool
-> Bool
-> Int64
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> RegexOpts
RegexOpts
    Bool
False Bool
False Int64
hs_re2_kDefaultMaxMem
    Bool
False Bool
False Bool
False Bool
False
    Bool
True Bool
False Bool
False Bool
False

-- | Exception thrown when using regex.
data RegexException = InvalidRegexPattern T.Text CallStack deriving Int -> RegexException -> ShowS
[RegexException] -> ShowS
RegexException -> String
(Int -> RegexException -> ShowS)
-> (RegexException -> String)
-> ([RegexException] -> ShowS)
-> Show RegexException
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RegexException] -> ShowS
$cshowList :: [RegexException] -> ShowS
show :: RegexException -> String
$cshow :: RegexException -> String
showsPrec :: Int -> RegexException -> ShowS
$cshowsPrec :: Int -> RegexException -> ShowS
Show
instance Exception RegexException

-- | Compile a regex pattern, throw 'InvalidRegexPattern' in case of illegal patterns.
--
regex :: HasCallStack => T.Text -> Regex
{-# NOINLINE regex #-}
regex :: Text -> Regex
regex Text
t = IO Regex -> Regex
forall a. IO a -> a
unsafePerformIO (IO Regex -> Regex) -> IO Regex -> Regex
forall a b. (a -> b) -> a -> b
$ do
    (CPtr Regex
cp, Ptr Regex
r) <- (MutableByteArray# RealWorld -> IO (Ptr Regex))
-> FunPtr (Ptr Regex -> IO ()) -> IO (CPtr Regex, Ptr Regex)
forall r a b.
(MutableByteArray# RealWorld -> IO r)
-> FunPtr (Ptr a -> IO b) -> IO (CPtr a, r)
newCPtrUnsafe (\ MutableByteArray# RealWorld
mba# ->
        (PrimVector Word8
-> (BA# Word8 -> Int -> Int -> IO (Ptr Regex)) -> IO (Ptr Regex)
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> PrimVector Word8
T.getUTF8Bytes Text
t) (MutableByteArray# RealWorld
-> BA# Word8 -> Int -> Int -> IO (Ptr Regex)
hs_re2_compile_pattern_default MutableByteArray# RealWorld
mba#)))
        FunPtr (Ptr Regex -> IO ())
p_hs_re2_delete_pattern

    Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Ptr Regex
r Ptr Regex -> Ptr Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Ptr Regex
forall a. Ptr a
nullPtr) (RegexException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (Text -> CallStack -> RegexException
InvalidRegexPattern Text
t CallStack
HasCallStack => CallStack
callStack))
    CInt
ok <- Ptr Regex -> IO CInt
hs_re2_ok Ptr Regex
r
    Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (CInt
ok CInt -> CInt -> Bool
forall a. Eq a => a -> a -> Bool
== CInt
0) (RegexException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (Text -> CallStack -> RegexException
InvalidRegexPattern Text
t CallStack
HasCallStack => CallStack
callStack))

    Int
n <- CPtr Regex -> (Ptr Regex -> IO Int) -> IO Int
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
cp Ptr Regex -> IO Int
hs_num_capture_groups
    Regex -> IO Regex
forall (m :: * -> *) a. Monad m => a -> m a
return (CPtr Regex -> Int -> Text -> Regex
Regex CPtr Regex
cp Int
n Text
t)

-- | Compile a regex pattern withOptions, throw 'InvalidRegexPattern' in case of illegal patterns.
regexOpts :: HasCallStack => RegexOpts -> T.Text -> Regex
{-# NOINLINE regexOpts #-}
regexOpts :: RegexOpts -> Text -> Regex
regexOpts RegexOpts{Bool
Int64
one_line :: Bool
word_boundary :: Bool
perl_classes :: Bool
case_sensitive :: Bool
never_capture :: Bool
dot_nl :: Bool
never_nl :: Bool
literal :: Bool
max_mem :: Int64
longest_match :: Bool
posix_syntax :: Bool
one_line :: RegexOpts -> Bool
word_boundary :: RegexOpts -> Bool
perl_classes :: RegexOpts -> Bool
case_sensitive :: RegexOpts -> Bool
never_capture :: RegexOpts -> Bool
dot_nl :: RegexOpts -> Bool
never_nl :: RegexOpts -> Bool
literal :: RegexOpts -> Bool
max_mem :: RegexOpts -> Int64
longest_match :: RegexOpts -> Bool
posix_syntax :: RegexOpts -> Bool
..} Text
t = IO Regex -> Regex
forall a. IO a -> a
unsafePerformIO (IO Regex -> Regex) -> IO Regex -> Regex
forall a b. (a -> b) -> a -> b
$ do
    (CPtr Regex
cp, Ptr Regex
r) <- (MutableByteArray# RealWorld -> IO (Ptr Regex))
-> FunPtr (Ptr Regex -> IO ()) -> IO (CPtr Regex, Ptr Regex)
forall r a b.
(MutableByteArray# RealWorld -> IO r)
-> FunPtr (Ptr a -> IO b) -> IO (CPtr a, r)
newCPtrUnsafe ( \ MutableByteArray# RealWorld
mba# ->
        (PrimVector Word8
-> (BA# Word8 -> Int -> Int -> IO (Ptr Regex)) -> IO (Ptr Regex)
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> PrimVector Word8
T.getUTF8Bytes Text
t) ((BA# Word8 -> Int -> Int -> IO (Ptr Regex)) -> IO (Ptr Regex))
-> (BA# Word8 -> Int -> Int -> IO (Ptr Regex)) -> IO (Ptr Regex)
forall a b. (a -> b) -> a -> b
$ \ BA# Word8
p Int
o Int
l ->
            MutableByteArray# RealWorld
-> BA# Word8
-> Int
-> Int
-> CBool
-> CBool
-> Int64
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> IO (Ptr Regex)
hs_re2_compile_pattern MutableByteArray# RealWorld
mba# BA# Word8
p Int
o Int
l
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
posix_syntax  )
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
longest_match )
                Int64
max_mem
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
literal       )
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
never_nl      )
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
dot_nl        )
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
never_capture )
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
case_sensitive)
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
perl_classes  )
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
word_boundary )
                (Bool -> CBool
forall a. Num a => Bool -> a
fromBool Bool
one_line      )))
        FunPtr (Ptr Regex -> IO ())
p_hs_re2_delete_pattern

    Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Ptr Regex
r Ptr Regex -> Ptr Regex -> Bool
forall a. Eq a => a -> a -> Bool
== Ptr Regex
forall a. Ptr a
nullPtr) (RegexException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (Text -> CallStack -> RegexException
InvalidRegexPattern Text
t CallStack
HasCallStack => CallStack
callStack))
    CInt
ok <- Ptr Regex -> IO CInt
hs_re2_ok Ptr Regex
r
    Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (CInt
ok CInt -> CInt -> Bool
forall a. Eq a => a -> a -> Bool
== CInt
0) (RegexException -> IO ()
forall e a. Exception e => e -> IO a
throwIO (Text -> CallStack -> RegexException
InvalidRegexPattern Text
t CallStack
HasCallStack => CallStack
callStack))

    Int
n <- CPtr Regex -> (Ptr Regex -> IO Int) -> IO Int
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
cp Ptr Regex -> IO Int
hs_num_capture_groups
    Regex -> IO Regex
forall (m :: * -> *) a. Monad m => a -> m a
return (CPtr Regex -> Int -> Text -> Regex
Regex CPtr Regex
cp Int
n Text
t)

-- | Escape a piece of text literal so that it can be safely used in regex pattern.
--
-- >>> escape "(\\d+)"
-- >>> "\\(\\\\d\\+\\)"
--
escape :: T.Text -> T.Text
{-# INLINABLE escape #-}
escape :: Text -> Text
escape Text
t = PrimVector Word8 -> Text
T.Text (PrimVector Word8 -> Text)
-> (IO (Ptr StdString) -> PrimVector Word8)
-> IO (Ptr StdString)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO (PrimVector Word8) -> PrimVector Word8
forall a. IO a -> a
unsafePerformIO (IO (PrimVector Word8) -> PrimVector Word8)
-> (IO (Ptr StdString) -> IO (PrimVector Word8))
-> IO (Ptr StdString)
-> PrimVector Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO (Ptr StdString) -> IO (PrimVector Word8)
fromStdString (IO (Ptr StdString) -> Text) -> IO (Ptr StdString) -> Text
forall a b. (a -> b) -> a -> b
$
    PrimVector Word8
-> (BA# Word8 -> Int -> Int -> IO (Ptr StdString))
-> IO (Ptr StdString)
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> PrimVector Word8
T.getUTF8Bytes Text
t) BA# Word8 -> Int -> Int -> IO (Ptr StdString)
hs_re2_quote_meta

-- | Check if text matched regex pattern.
test :: Regex -> T.Text -> Bool
{-# INLINABLE test #-}
test :: Regex -> Text -> Bool
test (Regex CPtr Regex
fp Int
_ Text
_) (T.Text PrimVector Word8
bs) = IO Bool -> Bool
forall a. IO a -> a
unsafePerformIO (IO Bool -> Bool) -> IO Bool -> Bool
forall a b. (a -> b) -> a -> b
$ do
    CPtr Regex -> (Ptr Regex -> IO Bool) -> IO Bool
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
fp ((Ptr Regex -> IO Bool) -> IO Bool)
-> (Ptr Regex -> IO Bool) -> IO Bool
forall a b. (a -> b) -> a -> b
$ \ Ptr Regex
p ->
        PrimVector Word8 -> (BA# Word8 -> Int -> Int -> IO Bool) -> IO Bool
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe PrimVector Word8
bs ((BA# Word8 -> Int -> Int -> IO Bool) -> IO Bool)
-> (BA# Word8 -> Int -> Int -> IO Bool) -> IO Bool
forall a b. (a -> b) -> a -> b
$ \ BA# Word8
ba# Int
s Int
l -> do
            CInt
r <- Ptr Regex -> BA# Word8 -> Int -> Int -> IO CInt
hs_re2_test Ptr Regex
p BA# Word8
ba# Int
s Int
l
            Bool -> IO Bool
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> IO Bool) -> Bool -> IO Bool
forall a b. (a -> b) -> a -> b
$! CInt
r CInt -> CInt -> Bool
forall a. Eq a => a -> a -> Bool
/= CInt
0

-- | Check if text matched regex pattern,
-- if so return matched part, all capturing groups(from @\\1@) and the text after matched part.
--
-- @Nothing@ indicate a non-matching capturing group, e.g.
--
-- >>> match (regex "(foo)|(bar)baz") "barbazbla"
-- >>> ("barbaz",[Nothing,Just "bar"], "bla")
--
match :: Regex -> T.Text -> (T.Text, [Maybe T.Text], T.Text)
{-# INLINABLE match #-}
match :: Regex -> Text -> (Text, [Maybe Text], Text)
match (Regex CPtr Regex
fp Int
n Text
_) t :: Text
t@(T.Text bs :: PrimVector Word8
bs@(V.PrimVector PrimArray Word8
ba Int
_ Int
_)) = IO (Text, [Maybe Text], Text) -> (Text, [Maybe Text], Text)
forall a. IO a -> a
unsafePerformIO (IO (Text, [Maybe Text], Text) -> (Text, [Maybe Text], Text))
-> IO (Text, [Maybe Text], Text) -> (Text, [Maybe Text], Text)
forall a b. (a -> b) -> a -> b
$ do
    CPtr Regex
-> (Ptr Regex -> IO (Text, [Maybe Text], Text))
-> IO (Text, [Maybe Text], Text)
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
fp ((Ptr Regex -> IO (Text, [Maybe Text], Text))
 -> IO (Text, [Maybe Text], Text))
-> (Ptr Regex -> IO (Text, [Maybe Text], Text))
-> IO (Text, [Maybe Text], Text)
forall a b. (a -> b) -> a -> b
$ \ Ptr Regex
p ->
        PrimVector Word8
-> (BA# Word8 -> Int -> Int -> IO (Text, [Maybe Text], Text))
-> IO (Text, [Maybe Text], Text)
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe PrimVector Word8
bs ((BA# Word8 -> Int -> Int -> IO (Text, [Maybe Text], Text))
 -> IO (Text, [Maybe Text], Text))
-> (BA# Word8 -> Int -> Int -> IO (Text, [Maybe Text], Text))
-> IO (Text, [Maybe Text], Text)
forall a b. (a -> b) -> a -> b
$ \ BA# Word8
ba# Int
s Int
l -> do
            (PrimArray Int
starts, (PrimArray Int
lens, CInt
r)) <- Int
-> (MutableByteArray# RealWorld -> IO (PrimArray Int, CInt))
-> IO (PrimArray Int, (PrimArray Int, CInt))
forall a b.
Prim a =>
Int -> (MutableByteArray# RealWorld -> IO b) -> IO (PrimArray a, b)
allocPrimArrayUnsafe Int
n ((MutableByteArray# RealWorld -> IO (PrimArray Int, CInt))
 -> IO (PrimArray Int, (PrimArray Int, CInt)))
-> (MutableByteArray# RealWorld -> IO (PrimArray Int, CInt))
-> IO (PrimArray Int, (PrimArray Int, CInt))
forall a b. (a -> b) -> a -> b
$ \ MutableByteArray# RealWorld
p_starts ->
                Int
-> (MutableByteArray# RealWorld -> IO CInt)
-> IO (PrimArray Int, CInt)
forall a b.
Prim a =>
Int -> (MutableByteArray# RealWorld -> IO b) -> IO (PrimArray a, b)
allocPrimArrayUnsafe Int
n ((MutableByteArray# RealWorld -> IO CInt)
 -> IO (PrimArray Int, CInt))
-> (MutableByteArray# RealWorld -> IO CInt)
-> IO (PrimArray Int, CInt)
forall a b. (a -> b) -> a -> b
$ \ MutableByteArray# RealWorld
p_ends ->
                    Ptr Regex
-> BA# Word8
-> Int
-> Int
-> Int
-> MutableByteArray# RealWorld
-> MutableByteArray# RealWorld
-> IO CInt
hs_re2_match Ptr Regex
p BA# Word8
ba# Int
s Int
l Int
n MutableByteArray# RealWorld
p_starts MutableByteArray# RealWorld
p_ends
            if CInt
r CInt -> CInt -> Bool
forall a. Eq a => a -> a -> Bool
== CInt
0
            then (Text, [Maybe Text], Text) -> IO (Text, [Maybe Text], Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
T.empty, [], Text
t)
            else do
                let !s0 :: Int
s0 = PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
A.indexArr PrimArray Int
starts Int
0
                    !l0 :: Int
l0 = PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
A.indexArr PrimArray Int
lens Int
0
                    caps :: [Maybe Text]
caps = ((Int -> Maybe Text) -> [Int] -> [Maybe Text]
forall a b. (a -> b) -> [a] -> [b]
map (\ !Int
i ->
                        let !s' :: Int
s' = PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
A.indexArr PrimArray Int
starts Int
i
                            !l' :: Int
l' = PrimArray Int -> Int -> Int
forall (arr :: * -> *) a. Arr arr a => arr a -> Int -> a
A.indexArr PrimArray Int
lens Int
i
                        in if Int
l' Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== -Int
1
                            then Maybe Text
forall a. Maybe a
Nothing
                            else (Text -> Maybe Text
forall a. a -> Maybe a
Just (PrimVector Word8 -> Text
T.Text (PrimArray Word8 -> Int -> Int -> PrimVector Word8
forall a. PrimArray a -> Int -> Int -> PrimVector a
V.PrimVector PrimArray Word8
ba Int
s' Int
l')))) [Int
1..Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1])
                (Text, [Maybe Text], Text) -> IO (Text, [Maybe Text], Text)
forall (m :: * -> *) a. Monad m => a -> m a
return (PrimVector Word8 -> Text
T.Text (PrimArray Word8 -> Int -> Int -> PrimVector Word8
forall a. PrimArray a -> Int -> Int -> PrimVector a
V.PrimVector PrimArray Word8
ba Int
s0 Int
l0)
                       , [Maybe Text]
caps
                       , PrimVector Word8 -> Text
T.Text (PrimArray Word8 -> Int -> Int -> PrimVector Word8
forall a. PrimArray a -> Int -> Int -> PrimVector a
V.PrimVector PrimArray Word8
ba (Int
s0Int -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l0) (Int
sInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
lInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s0Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
l0)))

-- | Replace matched part in input with a rewrite pattern.
-- If no matched part found, return the original input.
--
-- >>> replace (regex "red") False "A red fox with red fur" "yellow"
-- >>> "A yellow fox with red fur"
-- >>> replace (regex "red") True  "A red fox with red fur" "yellow"
-- >>> "A yellow fox with yellow fur"
--
replace :: Regex
        -> Bool     -- ^ globally replace?
        -> T.Text   -- ^ input
        -> T.Text   -- ^ rewrite
        -> T.Text
{-# INLINABLE replace #-}
replace :: Regex -> Bool -> Text -> Text -> Text
replace (Regex CPtr Regex
fp Int
_ Text
_) Bool
g Text
inp Text
rew = PrimVector Word8 -> Text
T.Text (PrimVector Word8 -> Text)
-> (IO (PrimVector Word8) -> PrimVector Word8)
-> IO (PrimVector Word8)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO (PrimVector Word8) -> PrimVector Word8
forall a. IO a -> a
unsafePerformIO (IO (PrimVector Word8) -> Text) -> IO (PrimVector Word8) -> Text
forall a b. (a -> b) -> a -> b
$ do
    CPtr Regex
-> (Ptr Regex -> IO (PrimVector Word8)) -> IO (PrimVector Word8)
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
fp ((Ptr Regex -> IO (PrimVector Word8)) -> IO (PrimVector Word8))
-> (Ptr Regex -> IO (PrimVector Word8)) -> IO (PrimVector Word8)
forall a b. (a -> b) -> a -> b
$ \ Ptr Regex
p ->
        PrimVector Word8
-> (BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
-> IO (PrimVector Word8)
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> PrimVector Word8
T.getUTF8Bytes Text
inp) ((BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
 -> IO (PrimVector Word8))
-> (BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
-> IO (PrimVector Word8)
forall a b. (a -> b) -> a -> b
$ \ BA# Word8
inpp Int
inpoff Int
inplen ->
            PrimVector Word8
-> (BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
-> IO (PrimVector Word8)
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> PrimVector Word8
T.getUTF8Bytes Text
rew) ((BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
 -> IO (PrimVector Word8))
-> (BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
-> IO (PrimVector Word8)
forall a b. (a -> b) -> a -> b
$ \ BA# Word8
rewp Int
rewoff Int
rewlen ->
                IO (Ptr StdString) -> IO (PrimVector Word8)
fromStdString ((if Bool
g then Ptr Regex
-> BA# Word8
-> Int
-> Int
-> BA# Word8
-> Int
-> Int
-> IO (Ptr StdString)
hs_re2_replace_g else Ptr Regex
-> BA# Word8
-> Int
-> Int
-> BA# Word8
-> Int
-> Int
-> IO (Ptr StdString)
hs_re2_replace)
                    Ptr Regex
p BA# Word8
inpp Int
inpoff Int
inplen BA# Word8
rewp Int
rewoff Int
rewlen)

-- | Extract capturing group to an extract pattern.
-- If no matched capturing group found, return an empty string.
--
-- >>> extract (regex "(\\d{4})-(\\d{2})-(\\d{2})") "Today is 2020-12-15" "month: \\2, date: \\3"
-- >>> "month: 12, date: 15"
--
extract :: Regex
        -> T.Text   -- ^ input
        -> T.Text   -- ^ extract
        -> T.Text
{-# INLINABLE extract #-}
extract :: Regex -> Text -> Text -> Text
extract (Regex CPtr Regex
fp Int
_ Text
_) Text
inp Text
rew = PrimVector Word8 -> Text
T.Text (PrimVector Word8 -> Text)
-> (IO (PrimVector Word8) -> PrimVector Word8)
-> IO (PrimVector Word8)
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO (PrimVector Word8) -> PrimVector Word8
forall a. IO a -> a
unsafePerformIO (IO (PrimVector Word8) -> Text) -> IO (PrimVector Word8) -> Text
forall a b. (a -> b) -> a -> b
$ do
    CPtr Regex
-> (Ptr Regex -> IO (PrimVector Word8)) -> IO (PrimVector Word8)
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
fp ((Ptr Regex -> IO (PrimVector Word8)) -> IO (PrimVector Word8))
-> (Ptr Regex -> IO (PrimVector Word8)) -> IO (PrimVector Word8)
forall a b. (a -> b) -> a -> b
$ \ Ptr Regex
p ->
        PrimVector Word8
-> (BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
-> IO (PrimVector Word8)
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> PrimVector Word8
T.getUTF8Bytes Text
inp) ((BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
 -> IO (PrimVector Word8))
-> (BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
-> IO (PrimVector Word8)
forall a b. (a -> b) -> a -> b
$ \ BA# Word8
inpp Int
inpoff Int
inplen ->
            PrimVector Word8
-> (BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
-> IO (PrimVector Word8)
forall a b.
Prim a =>
PrimVector a -> (BA# Word8 -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> PrimVector Word8
T.getUTF8Bytes Text
rew) ((BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
 -> IO (PrimVector Word8))
-> (BA# Word8 -> Int -> Int -> IO (PrimVector Word8))
-> IO (PrimVector Word8)
forall a b. (a -> b) -> a -> b
$ \ BA# Word8
rewp Int
rewoff Int
rewlen ->
                IO (Ptr StdString) -> IO (PrimVector Word8)
fromStdString (Ptr Regex
-> BA# Word8
-> Int
-> Int
-> BA# Word8
-> Int
-> Int
-> IO (Ptr StdString)
hs_re2_extract Ptr Regex
p BA# Word8
inpp Int
inpoff Int
inplen BA# Word8
rewp Int
rewoff Int
rewlen)

--------------------------------------------------------------------------------

foreign import ccall unsafe hs_re2_compile_pattern_default
    :: MBA# (Ptr Regex) -> BA# Word8 -> Int -> Int
    -> IO (Ptr Regex)

foreign import ccall unsafe hs_re2_compile_pattern
    :: MBA# (Ptr Regex)
    -> BA# Word8 -> Int -> Int
    -> CBool -- ^ posix_syntax
    -> CBool -- ^ longest_match
    -> Int64 -- ^ max_mem
    -> CBool -- ^ literal
    -> CBool -- ^ never_nl
    -> CBool -- ^ dot_nl
    -> CBool -- ^ never_capture
    -> CBool -- ^ case_sensitive
    -> CBool -- ^ perl_classes
    -> CBool -- ^ word_boundary
    -> CBool -- ^ one_line
    -> IO (Ptr Regex)

foreign import ccall unsafe "&hs_re2_delete_pattern" p_hs_re2_delete_pattern :: FunPtr (Ptr Regex -> IO ())

foreign import ccall unsafe hs_re2_ok :: Ptr Regex -> IO CInt
foreign import ccall unsafe hs_num_capture_groups :: Ptr Regex -> IO Int

foreign import ccall unsafe hs_re2_quote_meta :: BA# Word8 -> Int -> Int -> IO (Ptr StdString)

foreign import ccall unsafe hs_re2_match :: Ptr Regex
                                         -> BA# Word8   -- ^ input
                                         -> Int        -- ^ input offest
                                         -> Int        -- ^ input length
                                         -> Int         -- ^ capture num
                                         -> MBA# Int    -- ^ capture starts
                                         -> MBA# Int    -- ^ capture lens
                                         -> IO CInt        -- ^ 0 for failure, 1 for success

foreign import ccall unsafe hs_re2_test :: Ptr Regex
                                          -> BA# Word8   -- ^ input
                                          -> Int        -- ^ input offest
                                          -> Int        -- ^ input length
                                          -> IO CInt        -- ^ 0 for failure, 1 for success

foreign import ccall unsafe hs_re2_replace :: Ptr Regex
                                           -> BA# Word8   -- ^ input
                                           -> Int        -- ^ input offest
                                           -> Int        -- ^ input length
                                           -> BA# Word8   -- ^ rewrite
                                           -> Int        -- ^ rewrite offest
                                           -> Int        -- ^ rewrite length
                                           -> IO (Ptr StdString) -- ^ NULL for failure

foreign import ccall unsafe hs_re2_replace_g :: Ptr Regex
                                             -> BA# Word8   -- ^ input
                                             -> Int        -- ^ input offest
                                             -> Int        -- ^ input length
                                             -> BA# Word8   -- ^ rewrite
                                             -> Int        -- ^ rewrite offest
                                             -> Int        -- ^ rewrite length
                                             -> IO (Ptr StdString) -- ^ NULL for failure

foreign import ccall unsafe hs_re2_extract :: Ptr Regex
                                           -> BA# Word8   -- ^ input
                                           -> Int        -- ^ input offest
                                           -> Int        -- ^ input length
                                           -> BA# Word8   -- ^ rewrite
                                           -> Int        -- ^ rewrite offest
                                           -> Int        -- ^ rewrite length
                                           -> IO (Ptr StdString) -- ^ NULL for failure

foreign import ccall unsafe hs_re2_kDefaultMaxMem :: Int64