{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE RankNTypes #-}
module Data.Bytes.Search
( findIndices
, replace
, isInfixOf
) where
import Prelude hiding (all, any, dropWhile, elem, foldl, foldr, length, map, null, readFile, replicate, takeWhile)
import Control.Monad.ST.Run (runByteArrayST, runPrimArrayST)
import Data.Bits (finiteBitSize, shiftL, (.&.), (.|.))
import Data.Bytes.Pure (length, unsafeHead, unsafeIndex)
import Data.Bytes.Types (Bytes (Bytes, array, offset))
import Data.Primitive (ByteArray, PrimArray)
import GHC.Exts (Int (I#))
import GHC.Word (Word32)
import qualified Data.Bytes.Byte as Byte
import qualified Data.Bytes.Pure as Pure
import qualified Data.Bytes.Types as Types
import qualified Data.Primitive as PM
replace ::
Bytes ->
Bytes ->
Bytes ->
Bytes
{-# NOINLINE replace #-}
replace :: Bytes -> Bytes -> Bytes -> Bytes
replace !Bytes
needle !Bytes
replacement !haystack :: Bytes
haystack@Bytes {$sel:array:Bytes :: Bytes -> ByteArray
array = ByteArray
haystackArray, $sel:offset:Bytes :: Bytes -> Int
offset = Int
haystackIndex, $sel:length:Bytes :: Bytes -> Int
length = Int
haystackLength}
| Bytes -> Int
Pure.length Bytes
needle Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = [Char] -> Bytes
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.replace: needle of length zero"
| Bytes -> Int
Pure.length Bytes
haystack Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = Bytes
Pure.empty
| Bytes -> Int
Pure.length Bytes
needle Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1
, Bytes -> Int
Pure.length Bytes
replacement Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1 =
let !needle0 :: Word8
needle0 = Bytes -> Int -> Word8
unsafeIndex Bytes
needle Int
0
!replacement0 :: Word8
replacement0 = Bytes -> Int -> Word8
unsafeIndex Bytes
replacement Int
0
in (Word8 -> Word8) -> Bytes -> Bytes
Pure.map (\Word8
w -> if Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
needle0 then Word8
replacement0 else Word8
w) Bytes
haystack
| Bool
otherwise =
let !hp :: Word32
hp = Bytes -> Word32
rollingHash Bytes
needle
!ixs :: PrimArray Int
ixs = Int -> Word32 -> Bytes -> ByteArray -> Int -> Int -> PrimArray Int
findIndicesKarpRabin Int
0 Word32
hp Bytes
needle ByteArray
haystackArray Int
haystackIndex Int
haystackLength
in ByteArray -> Bytes
Pure.fromByteArray (PrimArray Int
-> Bytes -> Int -> ByteArray -> Int -> Int -> ByteArray
replaceIndices PrimArray Int
ixs Bytes
replacement (Bytes -> Int
Pure.length Bytes
needle) ByteArray
haystackArray Int
haystackIndex Int
haystackLength)
replaceIndices :: PrimArray Int -> Bytes -> Int -> ByteArray -> Int -> Int -> ByteArray
replaceIndices :: PrimArray Int
-> Bytes -> Int -> ByteArray -> Int -> Int -> ByteArray
replaceIndices !PrimArray Int
ixs !Bytes
replacement !Int
patLen !ByteArray
haystack !Int
ix0 !Int
len0 = (forall s. ST s ByteArray) -> ByteArray
runByteArrayST ((forall s. ST s ByteArray) -> ByteArray)
-> (forall s. ST s ByteArray) -> ByteArray
forall a b. (a -> b) -> a -> b
$ do
let !ixsLen :: Int
ixsLen = PrimArray Int -> Int
forall a. Prim a => PrimArray a -> Int
PM.sizeofPrimArray PrimArray Int
ixs
let !delta :: Int
delta = Bytes -> Int
Pure.length Bytes
replacement Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
patLen
MutableByteArray (PrimState (ST s))
dst <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
PM.newByteArray (Int
len0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ixsLen Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
delta)
let applyReplacement :: Int -> Int -> ST s ByteArray
applyReplacement !Int
ixIx !Int
prevSrcIx =
if Int
ixIx Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
ixsLen
then do
let !srcMatchIx :: Int
srcMatchIx = PrimArray Int -> Int -> Int
forall a. Prim a => PrimArray a -> Int -> a
PM.indexPrimArray PrimArray Int
ixs Int
ixIx
let !offset :: Int
offset = Int
ixIx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
delta
let !dstIx :: Int
dstIx = Int
srcMatchIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ix0
MutableByteArray (PrimState (ST s)) -> Int -> Bytes -> ST s ()
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> Int -> Bytes -> m ()
Pure.unsafeCopy
MutableByteArray (PrimState (ST s))
dst
(Int
prevSrcIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ix0)
Bytes {$sel:array:Bytes :: ByteArray
array = ByteArray
haystack, $sel:offset:Bytes :: Int
offset = Int
prevSrcIx, $sel:length:Bytes :: Int
length = Int
srcMatchIx Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
prevSrcIx}
MutableByteArray (PrimState (ST s)) -> Int -> Bytes -> ST s ()
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> Int -> Bytes -> m ()
Pure.unsafeCopy MutableByteArray (PrimState (ST s))
dst Int
dstIx Bytes
replacement
Int -> Int -> ST s ByteArray
applyReplacement (Int
ixIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
srcMatchIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
patLen)
else do
let !offset :: Int
offset = Int
ixIx Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
delta
MutableByteArray (PrimState (ST s)) -> Int -> Bytes -> ST s ()
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> Int -> Bytes -> m ()
Pure.unsafeCopy
MutableByteArray (PrimState (ST s))
dst
(Int
prevSrcIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
offset Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
ix0)
Bytes {$sel:array:Bytes :: ByteArray
array = ByteArray
haystack, $sel:offset:Bytes :: Int
offset = Int
prevSrcIx, $sel:length:Bytes :: Int
length = (Int
len0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ix0) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
prevSrcIx}
MutableByteArray (PrimState (ST s)) -> ST s ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
PM.unsafeFreezeByteArray MutableByteArray (PrimState (ST s))
dst
Int -> Int -> ST s ByteArray
applyReplacement Int
0 Int
ix0
findIndices ::
Bytes ->
Bytes ->
PrimArray Int
findIndices :: Bytes -> Bytes -> PrimArray Int
findIndices Bytes
needle Bytes {ByteArray
$sel:array:Bytes :: Bytes -> ByteArray
array :: ByteArray
array, $sel:offset:Bytes :: Bytes -> Int
offset = Int
off, $sel:length:Bytes :: Bytes -> Int
length = Int
len}
| Int
needleLen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = [Char] -> PrimArray Int
forall a. [Char] -> a
errorWithoutStackTrace [Char]
"Data.Bytes.findIndices: needle with length zero"
| Int
len Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 = PrimArray Int
forall a. Monoid a => a
mempty
| Bool
otherwise =
let !hp :: Word32
hp = Bytes -> Word32
rollingHash Bytes
needle
in Int -> Word32 -> Bytes -> ByteArray -> Int -> Int -> PrimArray Int
findIndicesKarpRabin (Int -> Int
forall a. Num a => a -> a
negate Int
off) Word32
hp Bytes
needle ByteArray
array Int
off Int
len
where
needleLen :: Int
needleLen = Bytes -> Int
Pure.length Bytes
needle
findIndicesKarpRabin ::
Int ->
Word32 ->
Bytes ->
ByteArray ->
Int ->
Int ->
PrimArray Int
findIndicesKarpRabin :: Int -> Word32 -> Bytes -> ByteArray -> Int -> Int -> PrimArray Int
findIndicesKarpRabin !Int
ixModifier !Word32
hp !Bytes
pat !ByteArray
haystack !Int
ix0 !Int
len0 = (forall s. ST s (PrimArray Int)) -> PrimArray Int
forall a. (forall s. ST s (PrimArray a)) -> PrimArray a
runPrimArrayST ((forall s. ST s (PrimArray Int)) -> PrimArray Int)
-> (forall s. ST s (PrimArray Int)) -> PrimArray Int
forall a b. (a -> b) -> a -> b
$ do
let dstLen :: Int
dstLen = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Integral a => a -> a -> a
quot Int
len0 (Bytes -> Int
Pure.length Bytes
pat)
MutablePrimArray (PrimState (ST s)) Int
dst <- Int -> ST s (MutablePrimArray (PrimState (ST s)) Int)
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
Int -> m (MutablePrimArray (PrimState m) a)
PM.newPrimArray Int
dstLen
let go :: Int -> Int -> Int -> ST s (PrimArray Int)
go !Int
ix !Int
len !Int
ixIx = case Word32 -> Bytes -> Bytes -> Int
karpRabin Word32
hp Bytes
pat Bytes {$sel:array:Bytes :: ByteArray
array = ByteArray
haystack, $sel:offset:Bytes :: Int
offset = Int
ix, $sel:length:Bytes :: Int
length = Int
len} of
(-1) -> do
MutablePrimArray (PrimState (ST s)) Int -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Prim a) =>
MutablePrimArray (PrimState m) a -> Int -> m ()
PM.shrinkMutablePrimArray MutablePrimArray (PrimState (ST s)) Int
dst Int
ixIx
MutablePrimArray (PrimState (ST s)) Int -> ST s (PrimArray Int)
forall (m :: * -> *) a.
PrimMonad m =>
MutablePrimArray (PrimState m) a -> m (PrimArray a)
PM.unsafeFreezePrimArray MutablePrimArray (PrimState (ST s)) Int
dst
Int
skipCount -> do
let !advancement :: Int
advancement = Int
skipCount Int -> Int -> Int
forall a. Num a => a -> a -> a
- Bytes -> Int
Pure.length Bytes
pat
let !advancement' :: Int
advancement' = Int
advancement Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Bytes -> Int
Pure.length Bytes
pat
MutablePrimArray (PrimState (ST s)) Int -> Int -> Int -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutablePrimArray (PrimState m) a -> Int -> a -> m ()
PM.writePrimArray MutablePrimArray (PrimState (ST s)) Int
dst Int
ixIx (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
advancement Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
ixModifier)
let !ix' :: Int
ix' = Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
advancement'
Int -> Int -> Int -> ST s (PrimArray Int)
go Int
ix' (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
advancement') (Int
ixIx Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Int -> Int -> Int -> ST s (PrimArray Int)
go Int
ix0 Int
len0 Int
0
breakSubstring ::
Bytes ->
Bytes ->
Int
breakSubstring :: Bytes -> Bytes -> Int
breakSubstring !Bytes
pat !haystack :: Bytes
haystack@(Bytes ByteArray
_ Int
off0 Int
_) =
case Int
lp of
Int
0 -> Int
0
Int
1 -> case Word8 -> Bytes -> Int#
Byte.elemIndexLoop# (Bytes -> Word8
unsafeHead Bytes
pat) Bytes
haystack of
(Int#
-1#) -> (-Int
1)
Int#
off -> Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int# -> Int
I# Int#
off) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
off0
Int
_ ->
if Int
lp Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
8 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Word -> Int
forall b. FiniteBits b => b -> Int
finiteBitSize (Word
0 :: Word)
then Bytes -> Int
shift Bytes
haystack
else Word32 -> Bytes -> Bytes -> Int
karpRabin (Bytes -> Word32
rollingHash Bytes
pat) Bytes
pat Bytes
haystack
where
lp :: Int
lp = Bytes -> Int
length Bytes
pat
{-# INLINE shift #-}
shift :: Bytes -> Int
shift :: Bytes -> Int
shift !Bytes
src
| Bytes -> Int
length Bytes
src Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lp = (-Int
1)
| Bool
otherwise = Word -> Int -> Int
search (Bytes -> Word
intoWord (Bytes -> Word) -> Bytes -> Word
forall a b. (a -> b) -> a -> b
$ Int -> Bytes -> Bytes
Pure.unsafeTake Int
lp Bytes
src) Int
lp
where
intoWord :: Bytes -> Word
intoWord :: Bytes -> Word
intoWord = (Word -> Word8 -> Word) -> Word -> Bytes -> Word
forall a. (a -> Word8 -> a) -> a -> Bytes -> a
Pure.foldl' (\Word
w Word8
b -> (Word
w Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftL` Int
8) Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b) Word
0
wp :: Word
wp = Bytes -> Word
intoWord Bytes
pat
mask :: Word
mask = (Word
1 Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftL` (Int
8 Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
lp)) Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
1
search :: Word -> Int -> Int
search :: Word -> Int -> Int
search !Word
w !Int
i
| Word
w Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
wp = Int
i
| Bytes -> Int
length Bytes
src Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
i = (-Int
1)
| Bool
otherwise = Word -> Int -> Int
search Word
w' (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
where
b :: Word
b = Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Bytes -> Int -> Word8
Pure.unsafeIndex Bytes
src Int
i)
w' :: Word
w' = Word
mask Word -> Word -> Word
forall a. Bits a => a -> a -> a
.&. ((Word
w Word -> Int -> Word
forall a. Bits a => a -> Int -> a
`shiftL` Int
8) Word -> Word -> Word
forall a. Bits a => a -> a -> a
.|. Word
b)
rollingHash :: Bytes -> Word32
{-# INLINE rollingHash #-}
rollingHash :: Bytes -> Word32
rollingHash = (Word32 -> Word8 -> Word32) -> Word32 -> Bytes -> Word32
forall a. (a -> Word8 -> a) -> a -> Bytes -> a
Pure.foldl' (\Word32
h Word8
b -> Word32
h Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
hashKey Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b) Word32
0
hashKey :: Word32
{-# INLINE hashKey #-}
hashKey :: Word32
hashKey = Word32
2891336453
karpRabin :: Word32 -> Bytes -> Bytes -> Int
karpRabin :: Word32 -> Bytes -> Bytes -> Int
karpRabin !Word32
hp !Bytes
pat !Bytes
src
| Bytes -> Int
length Bytes
src Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
lp = (-Int
1)
| Bool
otherwise = Word32 -> Int -> Int
search (Bytes -> Word32
rollingHash (Bytes -> Word32) -> Bytes -> Word32
forall a b. (a -> b) -> a -> b
$ Int -> Bytes -> Bytes
Pure.unsafeTake Int
lp Bytes
src) Int
lp
where
lp :: Int
!lp :: Int
lp = Bytes -> Int
Pure.length Bytes
pat
m :: Word32
!m :: Word32
m = Word32
hashKey Word32 -> Int -> Word32
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
lp
get :: Int -> Word32
get :: Int -> Word32
get !Int
ix = Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Bytes -> Int -> Word8
Pure.unsafeIndex Bytes
src Int
ix)
search :: Word32 -> Int -> Int
search !Word32
hs !Int
i
| Word32
hp Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
hs Bool -> Bool -> Bool
&& Bytes -> Bytes -> Bool
eqBytesNoShortCut Bytes
pat (Int -> Bytes -> Bytes
Pure.unsafeTake Int
lp (Int -> Bytes -> Bytes
Pure.unsafeDrop (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
lp) Bytes
src)) = Int
i
| Bytes -> Int
length Bytes
src Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
i = (-Int
1)
| Bool
otherwise = Word32 -> Int -> Int
search Word32
hs' (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
where
hs' :: Word32
hs' =
Word32
hs Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Word32
hashKey
Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Int -> Word32
get Int
i
Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
- Word32
m Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
* Int -> Word32
get (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
lp)
isInfixOf ::
Bytes ->
Bytes ->
Bool
isInfixOf :: Bytes -> Bytes -> Bool
isInfixOf Bytes
p Bytes
s = Bytes -> Bool
Pure.null Bytes
p Bool -> Bool -> Bool
|| Bytes -> Bytes -> Int
breakSubstring Bytes
p Bytes
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0
eqBytesNoShortCut :: Bytes -> Bytes -> Bool
{-# INLINE eqBytesNoShortCut #-}
eqBytesNoShortCut :: Bytes -> Bytes -> Bool
eqBytesNoShortCut (Bytes ByteArray
arr1 Int
off1 Int
len1) (Bytes ByteArray
arr2 Int
off2 Int
_) =
ByteArray -> Int -> ByteArray -> Int -> Int -> Ordering
PM.compareByteArrays ByteArray
arr1 Int
off1 ByteArray
arr2 Int
off2 Int
len1 Ordering -> Ordering -> Bool
forall a. Eq a => a -> a -> Bool
== Ordering
EQ