{-# LANGUAGE OverloadedLists   #-}
{-# LANGUAGE OverloadedStrings #-}

module Jacinda.Regex ( lazySplit
                     , splitBy
                     , defaultRurePtr
                     , isMatch'
                     , find'
                     , sub1, subs
                     , compileDefault
                     , substr
                     , findCapture
                     , captures'
                     , capturesIx
                     ) where

import           Control.Exception        (Exception, throwIO)
import           Control.Monad            ((<=<))
import qualified Data.ByteString          as BS
import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString.Lazy     as BSL
import qualified Data.Vector              as V
import           Foreign.C.Types          (CSize)
import           Foreign.ForeignPtr       (plusForeignPtr)
import           Regex.Rure               (RureFlags, RureMatch (..), RurePtr, captures, compile, find, findCaptures, isMatch, matches', rureDefaultFlags, rureFlagDotNL)
import           System.IO.Unsafe         (unsafeDupablePerformIO, unsafePerformIO)

-- https://docs.rs/regex/latest/regex/#perl-character-classes-unicode-friendly
defaultFs :: BS.ByteString
defaultFs :: ByteString
defaultFs = ByteString
"\\s+"

{-# NOINLINE defaultRurePtr #-}
defaultRurePtr :: RurePtr
defaultRurePtr :: RurePtr
defaultRurePtr = IO RurePtr -> RurePtr
forall a. IO a -> a
unsafePerformIO (IO RurePtr -> RurePtr) -> IO RurePtr -> RurePtr
forall a b. (a -> b) -> a -> b
$ Either String RurePtr -> IO RurePtr
forall a. Either String a -> IO a
yIO (Either String RurePtr -> IO RurePtr)
-> IO (Either String RurePtr) -> IO RurePtr
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< RureFlags -> ByteString -> IO (Either String RurePtr)
compile RureFlags
genFlags ByteString
defaultFs

genFlags :: RureFlags
genFlags :: RureFlags
genFlags = RureFlags
rureDefaultFlags RureFlags -> RureFlags -> RureFlags
forall a. Semigroup a => a -> a -> a
<> RureFlags
rureFlagDotNL -- in case they want to use a custom record separator

substr :: BS.ByteString -> Int -> Int -> BS.ByteString
substr :: ByteString -> Int -> Int -> ByteString
substr (BS.BS ForeignPtr Word8
fp Int
l) Int
begin Int
endϵ | Int
endϵ Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
begin = ForeignPtr Word8 -> Int -> ByteString
BS.BS (ForeignPtr Word8
fp ForeignPtr Word8 -> Int -> ForeignPtr Word8
forall a b. ForeignPtr a -> Int -> ForeignPtr b
`plusForeignPtr` Int
begin) (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
l Int
endϵInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
begin)
                               | Bool
otherwise = ByteString
"error: invalid substring indices."

captures' :: RurePtr -> BS.ByteString -> CSize -> [BS.ByteString]
captures' :: RurePtr -> ByteString -> CSize -> [ByteString]
captures' RurePtr
re haystack :: ByteString
haystack@(BS.BS ForeignPtr Word8
fp Int
_) CSize
ix = IO [ByteString] -> [ByteString]
forall a. IO a -> a
unsafeDupablePerformIO (IO [ByteString] -> [ByteString])
-> IO [ByteString] -> [ByteString]
forall a b. (a -> b) -> a -> b
$ (RureMatch -> ByteString) -> [RureMatch] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap RureMatch -> ByteString
go ([RureMatch] -> [ByteString]) -> IO [RureMatch] -> IO [ByteString]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RurePtr -> ByteString -> CSize -> IO [RureMatch]
captures RurePtr
re ByteString
haystack CSize
ix
    where go :: RureMatch -> ByteString
go (RureMatch CSize
s CSize
e) =
            let e' :: Int
e' = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
e
                s' :: Int
s' = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
s
                in ForeignPtr Word8 -> Int -> ByteString
BS.BS (ForeignPtr Word8
fp ForeignPtr Word8 -> Int -> ForeignPtr Word8
forall a b. ForeignPtr a -> Int -> ForeignPtr b
`plusForeignPtr` Int
s') (Int
e'Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s')

{-# NOINLINE capturesIx #-}
capturesIx :: RurePtr -> BS.ByteString -> CSize -> [RureMatch]
capturesIx :: RurePtr -> ByteString -> CSize -> [RureMatch]
capturesIx RurePtr
re ByteString
str CSize
n = IO [RureMatch] -> [RureMatch]
forall a. IO a -> a
unsafeDupablePerformIO (IO [RureMatch] -> [RureMatch]) -> IO [RureMatch] -> [RureMatch]
forall a b. (a -> b) -> a -> b
$ RurePtr -> ByteString -> CSize -> IO [RureMatch]
captures RurePtr
re ByteString
str CSize
n

{-# NOINLINE findCapture #-}
findCapture :: RurePtr -> BS.ByteString -> CSize -> Maybe BS.ByteString
findCapture :: RurePtr -> ByteString -> CSize -> Maybe ByteString
findCapture RurePtr
re haystack :: ByteString
haystack@(BS.BS ForeignPtr Word8
fp Int
_) CSize
ix = IO (Maybe ByteString) -> Maybe ByteString
forall a. IO a -> a
unsafeDupablePerformIO (IO (Maybe ByteString) -> Maybe ByteString)
-> IO (Maybe ByteString) -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ (RureMatch -> ByteString) -> Maybe RureMatch -> Maybe ByteString
forall a b. (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap RureMatch -> ByteString
go (Maybe RureMatch -> Maybe ByteString)
-> IO (Maybe RureMatch) -> IO (Maybe ByteString)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> RurePtr -> ByteString -> CSize -> CSize -> IO (Maybe RureMatch)
findCaptures RurePtr
re ByteString
haystack CSize
ix CSize
0
    where go :: RureMatch -> ByteString
go (RureMatch CSize
s CSize
e) =
            let e' :: Int
e' = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
e
                s' :: Int
s' = CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
s
                in ForeignPtr Word8 -> Int -> ByteString
BS.BS (ForeignPtr Word8
fp ForeignPtr Word8 -> Int -> ForeignPtr Word8
forall a b. ForeignPtr a -> Int -> ForeignPtr b
`plusForeignPtr` Int
s') (Int
e'Int -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s')

{-# NOINLINE subs #-}
subs :: RurePtr -> BS.ByteString -> BS.ByteString -> BS.ByteString
subs :: RurePtr -> ByteString -> ByteString -> ByteString
subs RurePtr
re ByteString
haystack = let ms :: [RureMatch]
ms = IO [RureMatch] -> [RureMatch]
forall a. IO a -> a
unsafeDupablePerformIO (IO [RureMatch] -> [RureMatch]) -> IO [RureMatch] -> [RureMatch]
forall a b. (a -> b) -> a -> b
$ RurePtr -> ByteString -> IO [RureMatch]
matches' RurePtr
re ByteString
haystack in Maybe RureMatch -> [RureMatch] -> ByteString -> ByteString
go Maybe RureMatch
forall a. Maybe a
Nothing [RureMatch]
ms
    where go :: Maybe RureMatch -> [RureMatch] -> ByteString -> ByteString
go Maybe RureMatch
_ [] ByteString
_                                         = ByteString
haystack
          go (Just (RureMatch CSize
_ CSize
pe)) ((RureMatch CSize
ms CSize
_):[RureMatch]
_) ByteString
_ | CSize
pe CSize -> CSize -> Bool
forall a. Ord a => a -> a -> Bool
> CSize
ms = String -> ByteString
forall a. HasCallStack => String -> a
error String
"Overlapping matches."
          go Maybe RureMatch
_ (m :: RureMatch
m@(RureMatch CSize
ms CSize
me):[RureMatch]
s) ByteString
substituend          = let next :: ByteString
next=Maybe RureMatch -> [RureMatch] -> ByteString -> ByteString
go (RureMatch -> Maybe RureMatch
forall a. a -> Maybe a
Just RureMatch
m) [RureMatch]
s ByteString
substituend in Int -> ByteString -> ByteString
BS.take (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
ms) ByteString
next ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
substituend ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
BS.drop (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
me) ByteString
next

sub1 :: RurePtr -> BS.ByteString -> BS.ByteString -> BS.ByteString
sub1 :: RurePtr -> ByteString -> ByteString -> ByteString
sub1 RurePtr
re ByteString
bs ByteString
ss =
    case RurePtr -> ByteString -> Maybe RureMatch
find' RurePtr
re ByteString
bs of
        Maybe RureMatch
Nothing              -> ByteString
bs
        Just (RureMatch CSize
s CSize
e) -> Int -> ByteString -> ByteString
BS.take (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
s) ByteString
bs ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
ss ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
BS.drop (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
e) ByteString
bs

{-# NOINLINE find' #-}
find' :: RurePtr -> BS.ByteString -> Maybe RureMatch
find' :: RurePtr -> ByteString -> Maybe RureMatch
find' RurePtr
re ByteString
str = IO (Maybe RureMatch) -> Maybe RureMatch
forall a. IO a -> a
unsafeDupablePerformIO (IO (Maybe RureMatch) -> Maybe RureMatch)
-> IO (Maybe RureMatch) -> Maybe RureMatch
forall a b. (a -> b) -> a -> b
$ RurePtr -> ByteString -> CSize -> IO (Maybe RureMatch)
find RurePtr
re ByteString
str CSize
0

lazySplit :: RurePtr -> BSL.ByteString -> [BS.ByteString]
lazySplit :: RurePtr -> ByteString -> [ByteString]
lazySplit RurePtr
rp ByteString
bs = let c :: [ByteString]
c=ByteString -> [ByteString]
BSL.toChunks ByteString
bs in Maybe ByteString -> [ByteString] -> [ByteString]
go Maybe ByteString
forall a. Maybe a
Nothing [ByteString]
c
        where go :: Maybe ByteString -> [ByteString] -> [ByteString]
go Maybe ByteString
Nothing []      = []
              go Maybe ByteString
Nothing (ByteString
c:[ByteString]
cs)  = let ss :: [ByteString]
ss=RurePtr -> ByteString -> [ByteString]
splitByA RurePtr
rp ByteString
c
                    in case [ByteString] -> Maybe ([ByteString], ByteString)
forall a. [a] -> Maybe ([a], a)
unsnoc [ByteString]
ss of
                        Just ([ByteString]
iss,ByteString
lss) -> [ByteString]
iss[ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++Maybe ByteString -> [ByteString] -> [ByteString]
go (ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just ByteString
lss) [ByteString]
cs
                        Maybe ([ByteString], ByteString)
Nothing        -> Maybe ByteString -> [ByteString] -> [ByteString]
go Maybe ByteString
forall a. Maybe a
Nothing [ByteString]
cs
              go (Just ByteString
c) []     = let ss :: [ByteString]
ss=RurePtr -> ByteString -> [ByteString]
splitByA RurePtr
rp ByteString
c in [ByteString]
ss
              go (Just ByteString
e) (ByteString
c:[ByteString]
cs) = let ss :: [ByteString]
ss=RurePtr -> ByteString -> [ByteString]
splitByA RurePtr
rp (ByteString
eByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<>ByteString
c)
                    in case [ByteString] -> Maybe ([ByteString], ByteString)
forall a. [a] -> Maybe ([a], a)
unsnoc [ByteString]
ss of
                        Just ([ByteString]
iss,ByteString
lss) -> [ByteString]
iss[ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++Maybe ByteString -> [ByteString] -> [ByteString]
go (ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just ByteString
lss) [ByteString]
cs
                        Maybe ([ByteString], ByteString)
Nothing        -> Maybe ByteString -> [ByteString] -> [ByteString]
go Maybe ByteString
forall a. Maybe a
Nothing [ByteString]
cs

unsnoc :: [a] -> Maybe ([a], a)
unsnoc :: forall a. [a] -> Maybe ([a], a)
unsnoc = (a -> Maybe ([a], a) -> Maybe ([a], a))
-> Maybe ([a], a) -> [a] -> Maybe ([a], a)
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\a
x Maybe ([a], a)
acc -> ([a], a) -> Maybe ([a], a)
forall a. a -> Maybe a
Just (([a], a) -> Maybe ([a], a)) -> ([a], a) -> Maybe ([a], a)
forall a b. (a -> b) -> a -> b
$ case Maybe ([a], a)
acc of {Maybe ([a], a)
Nothing -> ([], a
x); Just ~([a]
a, a
b) -> (a
xa -> [a] -> [a]
forall a. a -> [a] -> [a]
:[a]
a, a
b)}) Maybe ([a], a)
forall a. Maybe a
Nothing

splitBy :: RurePtr -> BS.ByteString -> V.Vector BS.ByteString
splitBy :: RurePtr -> ByteString -> Vector ByteString
splitBy = ([ByteString] -> Vector ByteString
forall a. [a] -> Vector a
V.fromList ([ByteString] -> Vector ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> Vector ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
.) ((ByteString -> [ByteString]) -> ByteString -> Vector ByteString)
-> (RurePtr -> ByteString -> [ByteString])
-> RurePtr
-> ByteString
-> Vector ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. RurePtr -> ByteString -> [ByteString]
splitByA

{-# NOINLINE splitBy #-}
splitByA :: RurePtr
         -> BS.ByteString
         -> [BS.ByteString]
splitByA :: RurePtr -> ByteString -> [ByteString]
splitByA RurePtr
_ ByteString
"" = [ByteString]
forall a. Monoid a => a
mempty
splitByA RurePtr
re haystack :: ByteString
haystack@(BS.BS ForeignPtr Word8
fp Int
l) =
    [ForeignPtr Word8 -> Int -> ByteString
BS.BS (ForeignPtr Word8
fp ForeignPtr Word8 -> Int -> ForeignPtr Word8
forall a b. ForeignPtr a -> Int -> ForeignPtr b
`plusForeignPtr` Int
s) (Int
eInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
s) | (Int
s, Int
e) <- [(Int, Int)]
slicePairs]
    where ixes :: [RureMatch]
ixes = IO [RureMatch] -> [RureMatch]
forall a. IO a -> a
unsafeDupablePerformIO (IO [RureMatch] -> [RureMatch]) -> IO [RureMatch] -> [RureMatch]
forall a b. (a -> b) -> a -> b
$ RurePtr -> ByteString -> IO [RureMatch]
matches' RurePtr
re ByteString
haystack
          slicePairs :: [(Int, Int)]
slicePairs = case [RureMatch]
ixes of
                (RureMatch CSize
0 CSize
i:[RureMatch]
rms) -> Int -> [RureMatch] -> [(Int, Int)]
forall {t}. Num t => t -> [RureMatch] -> [(t, Int)]
mkMiddle (CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
i) [RureMatch]
rms
                [RureMatch]
rms                 -> Int -> [RureMatch] -> [(Int, Int)]
forall {t}. Num t => t -> [RureMatch] -> [(t, Int)]
mkMiddle Int
0 [RureMatch]
rms
          mkMiddle :: t -> [RureMatch] -> [(t, Int)]
mkMiddle t
begin' []        = [(t
begin', Int
l)]
          mkMiddle t
begin' (RureMatch
rm0:[RureMatch]
rms) = (t
begin', CSize -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (RureMatch -> CSize
start RureMatch
rm0)) (t, Int) -> [(t, Int)] -> [(t, Int)]
forall a. a -> [a] -> [a]
: t -> [RureMatch] -> [(t, Int)]
mkMiddle (CSize -> t
forall a b. (Integral a, Num b) => a -> b
fromIntegral (CSize -> t) -> CSize -> t
forall a b. (a -> b) -> a -> b
$ RureMatch -> CSize
end RureMatch
rm0) [RureMatch]
rms

isMatch' :: RurePtr
         -> BS.ByteString
         -> Bool
isMatch' :: RurePtr -> ByteString -> Bool
isMatch' RurePtr
re ByteString
haystack = IO Bool -> Bool
forall a. IO a -> a
unsafeDupablePerformIO (IO Bool -> Bool) -> IO Bool -> Bool
forall a b. (a -> b) -> a -> b
$ RurePtr -> ByteString -> CSize -> IO Bool
isMatch RurePtr
re ByteString
haystack CSize
0

compileDefault :: BS.ByteString -> RurePtr
compileDefault :: ByteString -> RurePtr
compileDefault = IO RurePtr -> RurePtr
forall a. IO a -> a
unsafeDupablePerformIO (IO RurePtr -> RurePtr)
-> (ByteString -> IO RurePtr) -> ByteString -> RurePtr
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Either String RurePtr -> IO RurePtr
forall a. Either String a -> IO a
yIO (Either String RurePtr -> IO RurePtr)
-> (ByteString -> IO (Either String RurePtr))
-> ByteString
-> IO RurePtr
forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< RureFlags -> ByteString -> IO (Either String RurePtr)
compile RureFlags
genFlags)

newtype RureExe = RegexCompile String

instance Show RureExe where show :: RureExe -> String
show (RegexCompile String
str) = String
str

instance Exception RureExe where

yIO :: Either String a -> IO a
yIO :: forall a. Either String a -> IO a
yIO = (String -> IO a) -> (a -> IO a) -> Either String a -> IO a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (RureExe -> IO a
forall e a. (HasCallStack, Exception e) => e -> IO a
throwIO (RureExe -> IO a) -> (String -> RureExe) -> String -> IO a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> RureExe
RegexCompile) a -> IO a
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure