{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.Text.Utf8.BoyerMoore.Automaton
( Automaton
, CaseSensitivity (..)
, CodeUnitIndex (..)
, Next (..)
, buildAutomaton
, patternLength
, patternText
, runText
) where
import Prelude hiding (length)
import Control.DeepSeq (NFData)
import Control.Monad (when)
import Control.Monad.ST (runST)
import Data.Hashable (Hashable (..), Hashed, hashed, unhashed)
import GHC.Generics (Generic)
#if defined(HAS_AESON)
import qualified Data.Aeson as AE
#endif
import Data.Text.AhoCorasick.Automaton (Next (..))
import Data.Text.CaseSensitivity (CaseSensitivity (..))
import Data.Text.Utf8 (CodeUnit, CodeUnitIndex (..), Text)
import Data.TypedByteArray (Prim, TypedByteArray)
import qualified Data.Text.Utf8 as Utf8
import qualified Data.TypedByteArray as TBA
data Automaton = Automaton
{ Automaton -> Hashed Text
automatonPattern :: Hashed Text
, Automaton -> SuffixTable
automatonSuffixTable :: SuffixTable
, Automaton -> BadCharTable
automatonBadCharTable :: BadCharTable
}
deriving stock ((forall x. Automaton -> Rep Automaton x)
-> (forall x. Rep Automaton x -> Automaton) -> Generic Automaton
forall x. Rep Automaton x -> Automaton
forall x. Automaton -> Rep Automaton x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Automaton x -> Automaton
$cfrom :: forall x. Automaton -> Rep Automaton x
Generic, Int -> Automaton -> ShowS
[Automaton] -> ShowS
Automaton -> String
(Int -> Automaton -> ShowS)
-> (Automaton -> String)
-> ([Automaton] -> ShowS)
-> Show Automaton
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Automaton] -> ShowS
$cshowList :: [Automaton] -> ShowS
show :: Automaton -> String
$cshow :: Automaton -> String
showsPrec :: Int -> Automaton -> ShowS
$cshowsPrec :: Int -> Automaton -> ShowS
Show)
deriving anyclass (Automaton -> ()
(Automaton -> ()) -> NFData Automaton
forall a. (a -> ()) -> NFData a
rnf :: Automaton -> ()
$crnf :: Automaton -> ()
NFData)
instance Hashable Automaton where
hashWithSalt :: Int -> Automaton -> Int
hashWithSalt Int
salt (Automaton Hashed Text
pattern SuffixTable
_ BadCharTable
_) = Int -> Hashed Text -> Int
forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
salt Hashed Text
pattern
instance Eq Automaton where
(Automaton Hashed Text
pat1 SuffixTable
_ BadCharTable
_) == :: Automaton -> Automaton -> Bool
== (Automaton Hashed Text
pat2 SuffixTable
_ BadCharTable
_) = Hashed Text
pat1 Hashed Text -> Hashed Text -> Bool
forall a. Eq a => a -> a -> Bool
== Hashed Text
pat2
#if defined(HAS_AESON)
instance AE.FromJSON Automaton where
parseJSON :: Value -> Parser Automaton
parseJSON Value
v = Text -> Automaton
buildAutomaton (Text -> Automaton) -> Parser Text -> Parser Automaton
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Value -> Parser Text
forall a. FromJSON a => Value -> Parser a
AE.parseJSON Value
v
instance AE.ToJSON Automaton where
toJSON :: Automaton -> Value
toJSON = Text -> Value
forall a. ToJSON a => a -> Value
AE.toJSON (Text -> Value) -> (Automaton -> Text) -> Automaton -> Value
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Hashed Text -> Text
forall a. Hashed a -> a
unhashed (Hashed Text -> Text)
-> (Automaton -> Hashed Text) -> Automaton -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Automaton -> Hashed Text
automatonPattern
#endif
buildAutomaton :: Text -> Automaton
buildAutomaton :: Text -> Automaton
buildAutomaton Text
pattern = Hashed Text -> SuffixTable -> BadCharTable -> Automaton
Automaton (Text -> Hashed Text
forall a. Hashable a => a -> Hashed a
hashed Text
pattern) (Text -> SuffixTable
buildSuffixTable Text
pattern) (Text -> BadCharTable
buildBadCharTable Text
pattern)
runText :: forall a
. a
-> (a -> CodeUnitIndex -> Next a)
-> Automaton
-> Text
-> a
{-# INLINE runText #-}
runText :: a -> (a -> CodeUnitIndex -> Next a) -> Automaton -> Text -> a
runText a
seed a -> CodeUnitIndex -> Next a
f Automaton
automaton Text
text
| CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Eq a => a -> a -> Bool
== CodeUnitIndex
0 = a
seed
| Bool
otherwise = a -> CodeUnitIndex -> a
go a
seed (CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1)
where
Automaton Hashed Text
patternHashed SuffixTable
suffixTable BadCharTable
badCharTable = Automaton
automaton
needle :: Text
needle = Hashed Text -> Text
forall a. Hashed a -> a
unhashed Hashed Text
patternHashed
patLen :: CodeUnitIndex
patLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
needle
stringLen :: CodeUnitIndex
stringLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
text
codeUnitAt :: CodeUnitIndex -> CodeUnit
codeUnitAt = Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
text
{-# INLINE go #-}
go :: a -> CodeUnitIndex -> a
go a
result CodeUnitIndex
haystackIndex
| CodeUnitIndex
haystackIndex CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
stringLen = a -> CodeUnitIndex -> CodeUnitIndex -> a
matchLoop a
result CodeUnitIndex
haystackIndex (CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1)
| Bool
otherwise = a
result
matchLoop :: a -> CodeUnitIndex -> CodeUnitIndex -> a
matchLoop a
result CodeUnitIndex
haystackIndex CodeUnitIndex
needleIndex
| CodeUnitIndex
needleIndex CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
0 Bool -> Bool -> Bool
&& CodeUnitIndex -> CodeUnit
codeUnitAt CodeUnitIndex
haystackIndex CodeUnit -> CodeUnit -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
needle CodeUnitIndex
needleIndex =
a -> CodeUnitIndex -> CodeUnitIndex -> a
matchLoop a
result (CodeUnitIndex
haystackIndex CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1) (CodeUnitIndex
needleIndex CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1)
| CodeUnitIndex
needleIndex CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
0 =
case a -> CodeUnitIndex -> Next a
f a
result (CodeUnitIndex
haystackIndex CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
1) of
Done a
final -> a
final
Step a
intermediate -> a -> CodeUnitIndex -> a
go a
intermediate (CodeUnitIndex
haystackIndex CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
2 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
* CodeUnitIndex
patLen)
| Bool
otherwise =
let
badCharSkip :: CodeUnitIndex
badCharSkip = BadCharTable -> CodeUnit -> CodeUnitIndex
badCharLookup BadCharTable
badCharTable (CodeUnitIndex -> CodeUnit
codeUnitAt CodeUnitIndex
haystackIndex)
suffixSkip :: CodeUnitIndex
suffixSkip = SuffixTable -> CodeUnitIndex -> CodeUnitIndex
suffixLookup SuffixTable
suffixTable CodeUnitIndex
needleIndex
skip :: CodeUnitIndex
skip = CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Ord a => a -> a -> a
max CodeUnitIndex
badCharSkip CodeUnitIndex
suffixSkip
in
a -> CodeUnitIndex -> a
go a
result (CodeUnitIndex
haystackIndex CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
skip)
patternLength :: Automaton -> CodeUnitIndex
patternLength :: Automaton -> CodeUnitIndex
patternLength = Text -> CodeUnitIndex
Utf8.lengthUtf8 (Text -> CodeUnitIndex)
-> (Automaton -> Text) -> Automaton -> CodeUnitIndex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Automaton -> Text
patternText
patternText :: Automaton -> Text
patternText :: Automaton -> Text
patternText (Automaton Hashed Text
pattern SuffixTable
_ BadCharTable
_) = Hashed Text -> Text
forall a. Hashed a -> a
unhashed Hashed Text
pattern
newtype SuffixTable = SuffixTable (TypedByteArray Int)
deriving stock ((forall x. SuffixTable -> Rep SuffixTable x)
-> (forall x. Rep SuffixTable x -> SuffixTable)
-> Generic SuffixTable
forall x. Rep SuffixTable x -> SuffixTable
forall x. SuffixTable -> Rep SuffixTable x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SuffixTable x -> SuffixTable
$cfrom :: forall x. SuffixTable -> Rep SuffixTable x
Generic, Int -> SuffixTable -> ShowS
[SuffixTable] -> ShowS
SuffixTable -> String
(Int -> SuffixTable -> ShowS)
-> (SuffixTable -> String)
-> ([SuffixTable] -> ShowS)
-> Show SuffixTable
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SuffixTable] -> ShowS
$cshowList :: [SuffixTable] -> ShowS
show :: SuffixTable -> String
$cshow :: SuffixTable -> String
showsPrec :: Int -> SuffixTable -> ShowS
$cshowsPrec :: Int -> SuffixTable -> ShowS
Show)
deriving anyclass (SuffixTable -> ()
(SuffixTable -> ()) -> NFData SuffixTable
forall a. (a -> ()) -> NFData a
rnf :: SuffixTable -> ()
$crnf :: SuffixTable -> ()
NFData)
suffixLookup :: SuffixTable -> CodeUnitIndex -> CodeUnitIndex
{-# INLINE suffixLookup #-}
suffixLookup :: SuffixTable -> CodeUnitIndex -> CodeUnitIndex
suffixLookup (SuffixTable TypedByteArray Int
table) = Int -> CodeUnitIndex
CodeUnitIndex (Int -> CodeUnitIndex)
-> (CodeUnitIndex -> Int) -> CodeUnitIndex -> CodeUnitIndex
forall b c a. (b -> c) -> (a -> b) -> a -> c
. TypedByteArray Int -> Int -> Int
forall a. Prim a => TypedByteArray a -> Int -> a
indexTable TypedByteArray Int
table (Int -> Int) -> (CodeUnitIndex -> Int) -> CodeUnitIndex -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CodeUnitIndex -> Int
codeUnitIndex
buildSuffixTable :: Text -> SuffixTable
buildSuffixTable :: Text -> SuffixTable
buildSuffixTable Text
pattern = (forall s. ST s SuffixTable) -> SuffixTable
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s SuffixTable) -> SuffixTable)
-> (forall s. ST s SuffixTable) -> SuffixTable
forall a b. (a -> b) -> a -> b
$ do
let patLen :: CodeUnitIndex
patLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
pattern
MutableTypedByteArray Int s
table <- Int -> ST s (MutableTypedByteArray Int (PrimState (ST s)))
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
Int -> m (MutableTypedByteArray a (PrimState m))
TBA.newTypedByteArray (Int -> ST s (MutableTypedByteArray Int (PrimState (ST s))))
-> Int -> ST s (MutableTypedByteArray Int (PrimState (ST s)))
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex -> Int
codeUnitIndex CodeUnitIndex
patLen
let
init1 :: CodeUnitIndex -> CodeUnitIndex -> ST s ()
init1 CodeUnitIndex
lastPrefixIndex CodeUnitIndex
p
| CodeUnitIndex
p CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
0 = do
let
prefixIndex :: CodeUnitIndex
prefixIndex
| Text -> CodeUnitIndex -> Bool
isPrefix Text
pattern (CodeUnitIndex
p CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
1) = CodeUnitIndex
p CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
1
| Bool
otherwise = CodeUnitIndex
lastPrefixIndex
MutableTypedByteArray Int (PrimState (ST s))
-> Int -> Int -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableTypedByteArray a (PrimState m) -> Int -> a -> m ()
TBA.writeTypedByteArray MutableTypedByteArray Int s
MutableTypedByteArray Int (PrimState (ST s))
table (CodeUnitIndex -> Int
codeUnitIndex CodeUnitIndex
p) (CodeUnitIndex -> Int
codeUnitIndex (CodeUnitIndex -> Int) -> CodeUnitIndex -> Int
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex
prefixIndex CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
p)
CodeUnitIndex -> CodeUnitIndex -> ST s ()
init1 CodeUnitIndex
prefixIndex (CodeUnitIndex
p CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1)
| Bool
otherwise = () -> ST s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
init2 :: CodeUnitIndex -> ST s ()
init2 CodeUnitIndex
p
| CodeUnitIndex
p CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1 = do
let
suffixLen :: CodeUnitIndex
suffixLen = Text -> CodeUnitIndex -> CodeUnitIndex
suffixLength Text
pattern CodeUnitIndex
p
Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
pattern (CodeUnitIndex
p CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
suffixLen) CodeUnit -> CodeUnit -> Bool
forall a. Eq a => a -> a -> Bool
/= Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
pattern (CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
suffixLen)) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$
MutableTypedByteArray Int (PrimState (ST s))
-> Int -> Int -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableTypedByteArray a (PrimState m) -> Int -> a -> m ()
TBA.writeTypedByteArray MutableTypedByteArray Int s
MutableTypedByteArray Int (PrimState (ST s))
table (CodeUnitIndex -> Int
codeUnitIndex (CodeUnitIndex -> Int) -> CodeUnitIndex -> Int
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
suffixLen) (CodeUnitIndex -> Int
codeUnitIndex (CodeUnitIndex -> Int) -> CodeUnitIndex -> Int
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
p CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
suffixLen)
CodeUnitIndex -> ST s ()
init2 (CodeUnitIndex
p CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
1)
| Bool
otherwise = () -> ST s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
CodeUnitIndex -> CodeUnitIndex -> ST s ()
init1 (CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1) (CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1)
CodeUnitIndex -> ST s ()
init2 CodeUnitIndex
0
TypedByteArray Int -> SuffixTable
SuffixTable (TypedByteArray Int -> SuffixTable)
-> ST s (TypedByteArray Int) -> ST s SuffixTable
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> MutableTypedByteArray Int (PrimState (ST s))
-> ST s (TypedByteArray Int)
forall (m :: * -> *) a.
PrimMonad m =>
MutableTypedByteArray a (PrimState m) -> m (TypedByteArray a)
TBA.unsafeFreezeTypedByteArray MutableTypedByteArray Int s
MutableTypedByteArray Int (PrimState (ST s))
table
data BadCharTable = BadCharTable
{ BadCharTable -> TypedByteArray Int
badCharTableEntries :: {-# UNPACK #-} !(TypedByteArray Int)
, BadCharTable -> CodeUnitIndex
badCharTablePatternLen :: CodeUnitIndex
}
deriving stock ((forall x. BadCharTable -> Rep BadCharTable x)
-> (forall x. Rep BadCharTable x -> BadCharTable)
-> Generic BadCharTable
forall x. Rep BadCharTable x -> BadCharTable
forall x. BadCharTable -> Rep BadCharTable x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep BadCharTable x -> BadCharTable
$cfrom :: forall x. BadCharTable -> Rep BadCharTable x
Generic, Int -> BadCharTable -> ShowS
[BadCharTable] -> ShowS
BadCharTable -> String
(Int -> BadCharTable -> ShowS)
-> (BadCharTable -> String)
-> ([BadCharTable] -> ShowS)
-> Show BadCharTable
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [BadCharTable] -> ShowS
$cshowList :: [BadCharTable] -> ShowS
show :: BadCharTable -> String
$cshow :: BadCharTable -> String
showsPrec :: Int -> BadCharTable -> ShowS
$cshowsPrec :: Int -> BadCharTable -> ShowS
Show)
deriving anyclass (BadCharTable -> ()
(BadCharTable -> ()) -> NFData BadCharTable
forall a. (a -> ()) -> NFData a
rnf :: BadCharTable -> ()
$crnf :: BadCharTable -> ()
NFData)
badcharTableSize :: Int
{-# INLINE badcharTableSize #-}
badcharTableSize :: Int
badcharTableSize = Int
256
badCharLookup :: BadCharTable -> CodeUnit -> CodeUnitIndex
{-# INLINE badCharLookup #-}
badCharLookup :: BadCharTable -> CodeUnit -> CodeUnitIndex
badCharLookup (BadCharTable TypedByteArray Int
asciiTable CodeUnitIndex
_patLen) CodeUnit
char = Int -> CodeUnitIndex
CodeUnitIndex (Int -> CodeUnitIndex) -> Int -> CodeUnitIndex
forall a b. (a -> b) -> a -> b
$ TypedByteArray Int -> Int -> Int
forall a. Prim a => TypedByteArray a -> Int -> a
indexTable TypedByteArray Int
asciiTable Int
intChar
where
intChar :: Int
intChar = CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
char
isPrefix :: Text -> CodeUnitIndex -> Bool
isPrefix :: Text -> CodeUnitIndex -> Bool
isPrefix Text
needle CodeUnitIndex
pos = CodeUnitIndex -> Bool
go CodeUnitIndex
0
where
suffixLen :: CodeUnitIndex
suffixLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
needle CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
pos
go :: CodeUnitIndex -> Bool
go CodeUnitIndex
i
| CodeUnitIndex
i CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
suffixLen =
if Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
needle CodeUnitIndex
i CodeUnit -> CodeUnit -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
needle (CodeUnitIndex
pos CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
i)
then CodeUnitIndex -> Bool
go (CodeUnitIndex
i CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
1)
else Bool
False
| Bool
otherwise = Bool
True
suffixLength :: Text -> CodeUnitIndex -> CodeUnitIndex
suffixLength :: Text -> CodeUnitIndex -> CodeUnitIndex
suffixLength Text
pattern CodeUnitIndex
pos = CodeUnitIndex -> CodeUnitIndex
go CodeUnitIndex
0
where
patLen :: CodeUnitIndex
patLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
pattern
go :: CodeUnitIndex -> CodeUnitIndex
go CodeUnitIndex
i
| Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
pattern (CodeUnitIndex
pos CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
i) CodeUnit -> CodeUnit -> Bool
forall a. Eq a => a -> a -> Bool
== Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
pattern (CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
i) Bool -> Bool -> Bool
&& CodeUnitIndex
i CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
pos = CodeUnitIndex -> CodeUnitIndex
go (CodeUnitIndex
i CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
1)
| Bool
otherwise = CodeUnitIndex
i
buildBadCharTable :: Text -> BadCharTable
buildBadCharTable :: Text -> BadCharTable
buildBadCharTable Text
pattern = (forall s. ST s BadCharTable) -> BadCharTable
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s BadCharTable) -> BadCharTable)
-> (forall s. ST s BadCharTable) -> BadCharTable
forall a b. (a -> b) -> a -> b
$ do
let patLen :: CodeUnitIndex
patLen = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
pattern
MutableTypedByteArray Int s
asciiTable <- Int -> Int -> ST s (MutableTypedByteArray Int (PrimState (ST s)))
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
Int -> a -> m (MutableTypedByteArray a (PrimState m))
TBA.replicate Int
badcharTableSize (Int -> ST s (MutableTypedByteArray Int (PrimState (ST s))))
-> Int -> ST s (MutableTypedByteArray Int (PrimState (ST s)))
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex -> Int
codeUnitIndex CodeUnitIndex
patLen
let
fillTable :: CodeUnitIndex -> ST s ()
fillTable !CodeUnitIndex
i
| CodeUnitIndex
i CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
< CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1 = do
let patChar :: CodeUnit
patChar = Text -> CodeUnitIndex -> CodeUnit
Utf8.unsafeIndexCodeUnit Text
pattern CodeUnitIndex
i
MutableTypedByteArray Int (PrimState (ST s))
-> Int -> Int -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableTypedByteArray a (PrimState m) -> Int -> a -> m ()
TBA.writeTypedByteArray MutableTypedByteArray Int s
MutableTypedByteArray Int (PrimState (ST s))
asciiTable (CodeUnit -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CodeUnit
patChar) (CodeUnitIndex -> Int
codeUnitIndex (CodeUnitIndex -> Int) -> CodeUnitIndex -> Int
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex
patLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
1 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
i)
CodeUnitIndex -> ST s ()
fillTable (CodeUnitIndex
i CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
1)
| Bool
otherwise = () -> ST s ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
CodeUnitIndex -> ST s ()
fillTable CodeUnitIndex
0
TypedByteArray Int
asciiTableFrozen <- MutableTypedByteArray Int (PrimState (ST s))
-> ST s (TypedByteArray Int)
forall (m :: * -> *) a.
PrimMonad m =>
MutableTypedByteArray a (PrimState m) -> m (TypedByteArray a)
TBA.unsafeFreezeTypedByteArray MutableTypedByteArray Int s
MutableTypedByteArray Int (PrimState (ST s))
asciiTable
BadCharTable -> ST s BadCharTable
forall (f :: * -> *) a. Applicative f => a -> f a
pure BadCharTable :: TypedByteArray Int -> CodeUnitIndex -> BadCharTable
BadCharTable
{ badCharTableEntries :: TypedByteArray Int
badCharTableEntries = TypedByteArray Int
asciiTableFrozen
, badCharTablePatternLen :: CodeUnitIndex
badCharTablePatternLen = CodeUnitIndex
patLen
}
indexTable :: Prim a => TypedByteArray a -> Int -> a
{-# INLINE indexTable #-}
indexTable :: TypedByteArray a -> Int -> a
indexTable = TypedByteArray a -> Int -> a
forall a. Prim a => TypedByteArray a -> Int -> a
TBA.unsafeIndex