module Z.Data.Text.Regex
(
Regex, regex, RegexOpts(..), defaultRegexOpts, regexOpts
, escape, regexCaptureNum, regexPattern
, RegexException(..)
, test
, match
, replace
, extract
) where
import Control.Exception
import Control.Monad
import Data.Int
import Data.Word
import GHC.Stack
import GHC.Generics
import Foreign.Marshal.Utils (fromBool)
import System.IO.Unsafe
import qualified Z.Data.Text.Base as T
import qualified Z.Data.Text.Print as T
import qualified Z.Data.Vector.Base as V
import qualified Z.Data.Array as A
import Z.Foreign.CPtr
import Z.Foreign
data Regex = Regex
{ Regex -> CPtr Regex
regexPtr :: {-# UNPACK #-} !(CPtr Regex)
, Regex -> Int
regexCaptureNum :: {-# UNPACK #-} !Int
, Regex -> Text
regexPattern :: T.Text
} deriving (Int -> Regex -> ShowS
[Regex] -> ShowS
Regex -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Regex] -> ShowS
$cshowList :: [Regex] -> ShowS
show :: Regex -> String
$cshow :: Regex -> String
showsPrec :: Int -> Regex -> ShowS
$cshowsPrec :: Int -> Regex -> ShowS
Show, forall x. Rep Regex x -> Regex
forall x. Regex -> Rep Regex x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Regex x -> Regex
$cfrom :: forall x. Regex -> Rep Regex x
Generic)
deriving anyclass Int -> Regex -> Builder ()
forall a. (Int -> a -> Builder ()) -> Print a
toUTF8BuilderP :: Int -> Regex -> Builder ()
$ctoUTF8BuilderP :: Int -> Regex -> Builder ()
T.Print
data RegexOpts = RegexOpts
{ RegexOpts -> Bool
posix_syntax :: Bool
, RegexOpts -> Bool
longest_match :: Bool
, RegexOpts -> Int64
max_mem :: {-# UNPACK #-} !Int64
, RegexOpts -> Bool
literal :: Bool
, RegexOpts -> Bool
never_nl :: Bool
, RegexOpts -> Bool
dot_nl :: Bool
, RegexOpts -> Bool
never_capture :: Bool
, RegexOpts -> Bool
case_sensitive :: Bool
, RegexOpts -> Bool
perl_classes :: Bool
, RegexOpts -> Bool
word_boundary :: Bool
, RegexOpts -> Bool
one_line :: Bool
} deriving (RegexOpts -> RegexOpts -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: RegexOpts -> RegexOpts -> Bool
$c/= :: RegexOpts -> RegexOpts -> Bool
== :: RegexOpts -> RegexOpts -> Bool
$c== :: RegexOpts -> RegexOpts -> Bool
Eq, Eq RegexOpts
RegexOpts -> RegexOpts -> Bool
RegexOpts -> RegexOpts -> Ordering
RegexOpts -> RegexOpts -> RegexOpts
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: RegexOpts -> RegexOpts -> RegexOpts
$cmin :: RegexOpts -> RegexOpts -> RegexOpts
max :: RegexOpts -> RegexOpts -> RegexOpts
$cmax :: RegexOpts -> RegexOpts -> RegexOpts
>= :: RegexOpts -> RegexOpts -> Bool
$c>= :: RegexOpts -> RegexOpts -> Bool
> :: RegexOpts -> RegexOpts -> Bool
$c> :: RegexOpts -> RegexOpts -> Bool
<= :: RegexOpts -> RegexOpts -> Bool
$c<= :: RegexOpts -> RegexOpts -> Bool
< :: RegexOpts -> RegexOpts -> Bool
$c< :: RegexOpts -> RegexOpts -> Bool
compare :: RegexOpts -> RegexOpts -> Ordering
$ccompare :: RegexOpts -> RegexOpts -> Ordering
Ord, Int -> RegexOpts -> ShowS
[RegexOpts] -> ShowS
RegexOpts -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RegexOpts] -> ShowS
$cshowList :: [RegexOpts] -> ShowS
show :: RegexOpts -> String
$cshow :: RegexOpts -> String
showsPrec :: Int -> RegexOpts -> ShowS
$cshowsPrec :: Int -> RegexOpts -> ShowS
Show, forall x. Rep RegexOpts x -> RegexOpts
forall x. RegexOpts -> Rep RegexOpts x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep RegexOpts x -> RegexOpts
$cfrom :: forall x. RegexOpts -> Rep RegexOpts x
Generic)
deriving anyclass Int -> RegexOpts -> Builder ()
forall a. (Int -> a -> Builder ()) -> Print a
toUTF8BuilderP :: Int -> RegexOpts -> Builder ()
$ctoUTF8BuilderP :: Int -> RegexOpts -> Builder ()
T.Print
defaultRegexOpts :: RegexOpts
defaultRegexOpts :: RegexOpts
defaultRegexOpts = Bool
-> Bool
-> Int64
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> Bool
-> RegexOpts
RegexOpts
Bool
False Bool
False Int64
hs_re2_kDefaultMaxMem
Bool
False Bool
False Bool
False Bool
False
Bool
True Bool
False Bool
False Bool
False
data RegexException = InvalidRegexPattern T.Text CallStack deriving Int -> RegexException -> ShowS
[RegexException] -> ShowS
RegexException -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [RegexException] -> ShowS
$cshowList :: [RegexException] -> ShowS
show :: RegexException -> String
$cshow :: RegexException -> String
showsPrec :: Int -> RegexException -> ShowS
$cshowsPrec :: Int -> RegexException -> ShowS
Show
instance Exception RegexException
regex :: HasCallStack => T.Text -> Regex
{-# NOINLINE regex #-}
regex :: HasCallStack => Text -> Regex
regex Text
t = forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ do
(CPtr Regex
cp, Ptr Regex
r) <- forall r a b.
(MutableByteArray# RealWorld -> IO r)
-> FunPtr (Ptr a -> IO b) -> IO (CPtr a, r)
newCPtrUnsafe (\ MutableByteArray# RealWorld
mba# ->
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe
(Text -> Bytes
T.getUTF8Bytes Text
t)
(MutableByteArray# RealWorld
-> BA# a -> Int -> Int -> IO (Ptr Regex)
hs_re2_compile_pattern_default MutableByteArray# RealWorld
mba#))
FunPtr (Ptr Regex -> IO ())
p_hs_re2_delete_pattern
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Ptr Regex
r forall a. Eq a => a -> a -> Bool
== forall a. Ptr a
nullPtr) (forall e a. Exception e => e -> IO a
throwIO (Text -> CallStack -> RegexException
InvalidRegexPattern Text
t HasCallStack => CallStack
callStack))
CInt
ok <- Ptr Regex -> IO CInt
hs_re2_ok Ptr Regex
r
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (CInt
ok forall a. Eq a => a -> a -> Bool
== CInt
0) (forall e a. Exception e => e -> IO a
throwIO (Text -> CallStack -> RegexException
InvalidRegexPattern Text
t HasCallStack => CallStack
callStack))
Int
n <- forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
cp Ptr Regex -> IO Int
hs_num_capture_groups
forall (m :: * -> *) a. Monad m => a -> m a
return (CPtr Regex -> Int -> Text -> Regex
Regex CPtr Regex
cp Int
n Text
t)
regexOpts :: HasCallStack => RegexOpts -> T.Text -> Regex
{-# NOINLINE regexOpts #-}
regexOpts :: HasCallStack => RegexOpts -> Text -> Regex
regexOpts RegexOpts{Bool
Int64
one_line :: Bool
word_boundary :: Bool
perl_classes :: Bool
case_sensitive :: Bool
never_capture :: Bool
dot_nl :: Bool
never_nl :: Bool
literal :: Bool
max_mem :: Int64
longest_match :: Bool
posix_syntax :: Bool
one_line :: RegexOpts -> Bool
word_boundary :: RegexOpts -> Bool
perl_classes :: RegexOpts -> Bool
case_sensitive :: RegexOpts -> Bool
never_capture :: RegexOpts -> Bool
dot_nl :: RegexOpts -> Bool
never_nl :: RegexOpts -> Bool
literal :: RegexOpts -> Bool
max_mem :: RegexOpts -> Int64
longest_match :: RegexOpts -> Bool
posix_syntax :: RegexOpts -> Bool
..} Text
t = forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ do
(CPtr Regex
cp, Ptr Regex
r) <- forall r a b.
(MutableByteArray# RealWorld -> IO r)
-> FunPtr (Ptr a -> IO b) -> IO (CPtr a, r)
newCPtrUnsafe ( \ MutableByteArray# RealWorld
mba# ->
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> Bytes
T.getUTF8Bytes Text
t) forall a b. (a -> b) -> a -> b
$ \ BA# a
p Int
o Int
l ->
MutableByteArray# RealWorld
-> BA# a
-> Int
-> Int
-> CBool
-> CBool
-> Int64
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> IO (Ptr Regex)
hs_re2_compile_pattern MutableByteArray# RealWorld
mba# BA# a
p Int
o Int
l
(forall a. Num a => Bool -> a
fromBool Bool
posix_syntax )
(forall a. Num a => Bool -> a
fromBool Bool
longest_match )
Int64
max_mem
(forall a. Num a => Bool -> a
fromBool Bool
literal )
(forall a. Num a => Bool -> a
fromBool Bool
never_nl )
(forall a. Num a => Bool -> a
fromBool Bool
dot_nl )
(forall a. Num a => Bool -> a
fromBool Bool
never_capture )
(forall a. Num a => Bool -> a
fromBool Bool
case_sensitive)
(forall a. Num a => Bool -> a
fromBool Bool
perl_classes )
(forall a. Num a => Bool -> a
fromBool Bool
word_boundary )
(forall a. Num a => Bool -> a
fromBool Bool
one_line ))
FunPtr (Ptr Regex -> IO ())
p_hs_re2_delete_pattern
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Ptr Regex
r forall a. Eq a => a -> a -> Bool
== forall a. Ptr a
nullPtr) (forall e a. Exception e => e -> IO a
throwIO (Text -> CallStack -> RegexException
InvalidRegexPattern Text
t HasCallStack => CallStack
callStack))
CInt
ok <- Ptr Regex -> IO CInt
hs_re2_ok Ptr Regex
r
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (CInt
ok forall a. Eq a => a -> a -> Bool
== CInt
0) (forall e a. Exception e => e -> IO a
throwIO (Text -> CallStack -> RegexException
InvalidRegexPattern Text
t HasCallStack => CallStack
callStack))
Int
n <- forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
cp Ptr Regex -> IO Int
hs_num_capture_groups
forall (m :: * -> *) a. Monad m => a -> m a
return (CPtr Regex -> Int -> Text -> Regex
Regex CPtr Regex
cp Int
n Text
t)
escape :: T.Text -> T.Text
{-# INLINABLE escape #-}
escape :: Text -> Text
escape Text
t = Bytes -> Text
T.Text forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. IO a -> a
unsafePerformIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO (Ptr StdString) -> IO Bytes
fromStdString forall a b. (a -> b) -> a -> b
$
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> Bytes
T.getUTF8Bytes Text
t) BA# a -> Int -> Int -> IO (Ptr StdString)
hs_re2_quote_meta
test :: Regex -> T.Text -> Bool
{-# INLINABLE test #-}
test :: Regex -> Text -> Bool
test (Regex CPtr Regex
fp Int
_ Text
_) (T.Text Bytes
bs) = forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ do
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
fp forall a b. (a -> b) -> a -> b
$ \ Ptr Regex
p ->
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe Bytes
bs forall a b. (a -> b) -> a -> b
$ \ BA# a
ba# Int
s Int
l -> do
CInt
r <- Ptr Regex -> BA# a -> Int -> Int -> IO CInt
hs_re2_test Ptr Regex
p BA# a
ba# Int
s Int
l
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! CInt
r forall a. Eq a => a -> a -> Bool
/= CInt
0
match :: Regex -> T.Text -> (T.Text, [Maybe T.Text], T.Text)
{-# INLINABLE match #-}
match :: Regex -> Text -> (Text, [Maybe Text], Text)
match (Regex CPtr Regex
fp Int
n Text
_) t :: Text
t@(T.Text bs :: Bytes
bs@(V.PrimVector PrimArray Word8
ba Int
_ Int
_)) = forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ do
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
fp forall a b. (a -> b) -> a -> b
$ \ Ptr Regex
p ->
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe Bytes
bs forall a b. (a -> b) -> a -> b
$ \ BA# a
ba# Int
s Int
l -> do
(PrimArray Int
starts, (PrimArray Int
lens, CInt
r)) <- forall a b.
Prim a =>
Int -> (MutableByteArray# RealWorld -> IO b) -> IO (PrimArray a, b)
allocPrimArrayUnsafe Int
n forall a b. (a -> b) -> a -> b
$ \ MutableByteArray# RealWorld
p_starts ->
forall a b.
Prim a =>
Int -> (MutableByteArray# RealWorld -> IO b) -> IO (PrimArray a, b)
allocPrimArrayUnsafe Int
n forall a b. (a -> b) -> a -> b
$ \ MutableByteArray# RealWorld
p_ends ->
Ptr Regex
-> BA# a
-> Int
-> Int
-> Int
-> MutableByteArray# RealWorld
-> MutableByteArray# RealWorld
-> IO CInt
hs_re2_match Ptr Regex
p BA# a
ba# Int
s Int
l Int
n MutableByteArray# RealWorld
p_starts MutableByteArray# RealWorld
p_ends
if CInt
r forall a. Eq a => a -> a -> Bool
== CInt
0
then forall (m :: * -> *) a. Monad m => a -> m a
return (Text
T.empty, [], Text
t)
else do
let !s0 :: Int
s0 = forall (arr :: * -> *) a.
(Arr arr a, HasCallStack) =>
arr a -> Int -> a
A.indexArr PrimArray Int
starts Int
0
!l0 :: Int
l0 = forall (arr :: * -> *) a.
(Arr arr a, HasCallStack) =>
arr a -> Int -> a
A.indexArr PrimArray Int
lens Int
0
caps :: [Maybe Text]
caps = (forall a b. (a -> b) -> [a] -> [b]
map (\ !Int
i ->
let !s' :: Int
s' = forall (arr :: * -> *) a.
(Arr arr a, HasCallStack) =>
arr a -> Int -> a
A.indexArr PrimArray Int
starts Int
i
!l' :: Int
l' = forall (arr :: * -> *) a.
(Arr arr a, HasCallStack) =>
arr a -> Int -> a
A.indexArr PrimArray Int
lens Int
i
in if Int
l' forall a. Eq a => a -> a -> Bool
== -Int
1
then forall a. Maybe a
Nothing
else (forall a. a -> Maybe a
Just (Bytes -> Text
T.Text (forall a. PrimArray a -> Int -> Int -> PrimVector a
V.PrimVector PrimArray Word8
ba Int
s' Int
l')))) [Int
1..Int
nforall a. Num a => a -> a -> a
-Int
1])
forall (m :: * -> *) a. Monad m => a -> m a
return (Bytes -> Text
T.Text (forall a. PrimArray a -> Int -> Int -> PrimVector a
V.PrimVector PrimArray Word8
ba Int
s0 Int
l0)
, [Maybe Text]
caps
, Bytes -> Text
T.Text (forall a. PrimArray a -> Int -> Int -> PrimVector a
V.PrimVector PrimArray Word8
ba (Int
s0forall a. Num a => a -> a -> a
+Int
l0) (Int
sforall a. Num a => a -> a -> a
+Int
lforall a. Num a => a -> a -> a
-Int
s0forall a. Num a => a -> a -> a
-Int
l0)))
replace :: Regex
-> Bool
-> T.Text
-> T.Text
-> T.Text
{-# INLINABLE replace #-}
replace :: Regex -> Bool -> Text -> Text -> Text
replace (Regex CPtr Regex
fp Int
_ Text
_) Bool
g Text
inp Text
rew = Bytes -> Text
T.Text forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ do
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
fp forall a b. (a -> b) -> a -> b
$ \ Ptr Regex
p ->
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> Bytes
T.getUTF8Bytes Text
inp) forall a b. (a -> b) -> a -> b
$ \ BA# a
inpp Int
inpoff Int
inplen ->
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> Bytes
T.getUTF8Bytes Text
rew) forall a b. (a -> b) -> a -> b
$ \ BA# a
rewp Int
rewoff Int
rewlen ->
IO (Ptr StdString) -> IO Bytes
fromStdString ((if Bool
g then Ptr Regex
-> BA# a -> Int -> Int -> BA# a -> Int -> Int -> IO (Ptr StdString)
hs_re2_replace_g else Ptr Regex
-> BA# a -> Int -> Int -> BA# a -> Int -> Int -> IO (Ptr StdString)
hs_re2_replace)
Ptr Regex
p BA# a
inpp Int
inpoff Int
inplen BA# a
rewp Int
rewoff Int
rewlen)
extract :: Regex
-> T.Text
-> T.Text
-> T.Text
{-# INLINABLE extract #-}
(Regex CPtr Regex
fp Int
_ Text
_) Text
inp Text
rew = Bytes -> Text
T.Text forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ do
forall a b. CPtr a -> (Ptr a -> IO b) -> IO b
withCPtr CPtr Regex
fp forall a b. (a -> b) -> a -> b
$ \ Ptr Regex
p ->
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> Bytes
T.getUTF8Bytes Text
inp) forall a b. (a -> b) -> a -> b
$ \ BA# a
inpp Int
inpoff Int
inplen ->
forall a b.
Prim a =>
PrimVector a -> (BA# a -> Int -> Int -> IO b) -> IO b
withPrimVectorUnsafe (Text -> Bytes
T.getUTF8Bytes Text
rew) forall a b. (a -> b) -> a -> b
$ \ BA# a
rewp Int
rewoff Int
rewlen ->
IO (Ptr StdString) -> IO Bytes
fromStdString (Ptr Regex
-> BA# a -> Int -> Int -> BA# a -> Int -> Int -> IO (Ptr StdString)
hs_re2_extract Ptr Regex
p BA# a
inpp Int
inpoff Int
inplen BA# a
rewp Int
rewoff Int
rewlen)
foreign import ccall unsafe hs_re2_compile_pattern_default
:: MBA# (Ptr Regex) -> BA# Word8 -> Int -> Int
-> IO (Ptr Regex)
foreign import ccall unsafe hs_re2_compile_pattern
:: MBA# (Ptr Regex)
-> BA# Word8 -> Int -> Int
-> CBool
-> CBool
-> Int64
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> CBool
-> IO (Ptr Regex)
foreign import ccall unsafe "&hs_re2_delete_pattern" p_hs_re2_delete_pattern :: FunPtr (Ptr Regex -> IO ())
foreign import ccall unsafe hs_re2_ok :: Ptr Regex -> IO CInt
foreign import ccall unsafe hs_num_capture_groups :: Ptr Regex -> IO Int
foreign import ccall unsafe hs_re2_quote_meta :: BA# Word8 -> Int -> Int -> IO (Ptr StdString)
foreign import ccall unsafe hs_re2_match :: Ptr Regex
-> BA# Word8
-> Int
-> Int
-> Int
-> MBA# Int
-> MBA# Int
-> IO CInt
foreign import ccall unsafe hs_re2_test :: Ptr Regex
-> BA# Word8
-> Int
-> Int
-> IO CInt
foreign import ccall unsafe hs_re2_replace :: Ptr Regex
-> BA# Word8
-> Int
-> Int
-> BA# Word8
-> Int
-> Int
-> IO (Ptr StdString)
foreign import ccall unsafe hs_re2_replace_g :: Ptr Regex
-> BA# Word8
-> Int
-> Int
-> BA# Word8
-> Int
-> Int
-> IO (Ptr StdString)
foreign import ccall unsafe :: Ptr Regex
-> BA# Word8
-> Int
-> Int
-> BA# Word8
-> Int
-> Int
-> IO (Ptr StdString)
foreign import ccall unsafe hs_re2_kDefaultMaxMem :: Int64