-- Alfred-Margaret: Fast Aho-Corasick string searching
-- Copyright 2019 Channable
--
-- Licensed under the 3-clause BSD license, see the LICENSE file in the
-- repository root.

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE DerivingStrategies #-}

-- | Implements sequential string replacements based on the Aho-Corasick algorithm.
module Data.Text.AhoCorasick.Replacer
    ( -- * State machine
      Needle
    , Payload (..)
    , Replacement
    , Replacer (..)
    , build
    , compose
    , run
    , runWithLimit
    ) where

import Control.DeepSeq (NFData)
import Data.Hashable (Hashable)
import Data.List (sort)
import Data.Maybe (fromJust)
import GHC.Generics (Generic)

#if defined(HAS_AESON)
import qualified Data.Aeson as AE
#endif

import Data.Text.CaseSensitivity (CaseSensitivity (..))
import Data.Text.Utf8 (CodeUnitIndex, Text)
import Data.Text.AhoCorasick.Searcher (Searcher)

import qualified Data.Text.Utf8 as Utf8
import qualified Data.Text.AhoCorasick.Automaton as Aho
import qualified Data.Text.AhoCorasick.Searcher as Searcher

-- | Descriptive type alias for strings to search for.
type Needle = Text

-- | Descriptive type alias for replacements.
type Replacement = Text

-- | Priority of a needle. Higher integers indicate higher priorities.
-- Replacement order is such that all matches of priority p are replaced before
-- replacing any matches of priority q where p > q.
type Priority = Int

data Payload = Payload
  { Payload -> Priority
needlePriority    :: {-# UNPACK #-} !Priority
  , Payload -> CodeUnitIndex
needleLength      :: {-# UNPACK #-} !CodeUnitIndex
  , Payload -> Replacement
needleReplacement :: !Replacement
  }
#if defined(HAS_AESON)
  deriving (Payload -> Payload -> Bool
(Payload -> Payload -> Bool)
-> (Payload -> Payload -> Bool) -> Eq Payload
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Payload -> Payload -> Bool
$c/= :: Payload -> Payload -> Bool
== :: Payload -> Payload -> Bool
$c== :: Payload -> Payload -> Bool
Eq, (forall x. Payload -> Rep Payload x)
-> (forall x. Rep Payload x -> Payload) -> Generic Payload
forall x. Rep Payload x -> Payload
forall x. Payload -> Rep Payload x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Payload x -> Payload
$cfrom :: forall x. Payload -> Rep Payload x
Generic, Eq Payload
Eq Payload
-> (Priority -> Payload -> Priority)
-> (Payload -> Priority)
-> Hashable Payload
Priority -> Payload -> Priority
Payload -> Priority
forall a.
Eq a
-> (Priority -> a -> Priority) -> (a -> Priority) -> Hashable a
hash :: Payload -> Priority
$chash :: Payload -> Priority
hashWithSalt :: Priority -> Payload -> Priority
$chashWithSalt :: Priority -> Payload -> Priority
$cp1Hashable :: Eq Payload
Hashable, Payload -> ()
(Payload -> ()) -> NFData Payload
forall a. (a -> ()) -> NFData a
rnf :: Payload -> ()
$crnf :: Payload -> ()
NFData, Priority -> Payload -> ShowS
[Payload] -> ShowS
Payload -> String
(Priority -> Payload -> ShowS)
-> (Payload -> String) -> ([Payload] -> ShowS) -> Show Payload
forall a.
(Priority -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Payload] -> ShowS
$cshowList :: [Payload] -> ShowS
show :: Payload -> String
$cshow :: Payload -> String
showsPrec :: Priority -> Payload -> ShowS
$cshowsPrec :: Priority -> Payload -> ShowS
Show, Value -> Parser [Payload]
Value -> Parser Payload
(Value -> Parser Payload)
-> (Value -> Parser [Payload]) -> FromJSON Payload
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [Payload]
$cparseJSONList :: Value -> Parser [Payload]
parseJSON :: Value -> Parser Payload
$cparseJSON :: Value -> Parser Payload
AE.FromJSON, [Payload] -> Encoding
[Payload] -> Value
Payload -> Encoding
Payload -> Value
(Payload -> Value)
-> (Payload -> Encoding)
-> ([Payload] -> Value)
-> ([Payload] -> Encoding)
-> ToJSON Payload
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [Payload] -> Encoding
$ctoEncodingList :: [Payload] -> Encoding
toJSONList :: [Payload] -> Value
$ctoJSONList :: [Payload] -> Value
toEncoding :: Payload -> Encoding
$ctoEncoding :: Payload -> Encoding
toJSON :: Payload -> Value
$ctoJSON :: Payload -> Value
AE.ToJSON)
#else
  deriving (Eq, Generic, Hashable, NFData, Show)
#endif

-- | A state machine used for efficient replacements with many different needles.
data Replacer = Replacer
  { Replacer -> CaseSensitivity
replacerCaseSensitivity :: CaseSensitivity
  , Replacer -> Searcher Payload
replacerSearcher :: Searcher Payload
  }
  deriving stock (Priority -> Replacer -> ShowS
[Replacer] -> ShowS
Replacer -> String
(Priority -> Replacer -> ShowS)
-> (Replacer -> String) -> ([Replacer] -> ShowS) -> Show Replacer
forall a.
(Priority -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Replacer] -> ShowS
$cshowList :: [Replacer] -> ShowS
show :: Replacer -> String
$cshow :: Replacer -> String
showsPrec :: Priority -> Replacer -> ShowS
$cshowsPrec :: Priority -> Replacer -> ShowS
Show, Replacer -> Replacer -> Bool
(Replacer -> Replacer -> Bool)
-> (Replacer -> Replacer -> Bool) -> Eq Replacer
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Replacer -> Replacer -> Bool
$c/= :: Replacer -> Replacer -> Bool
== :: Replacer -> Replacer -> Bool
$c== :: Replacer -> Replacer -> Bool
Eq, (forall x. Replacer -> Rep Replacer x)
-> (forall x. Rep Replacer x -> Replacer) -> Generic Replacer
forall x. Rep Replacer x -> Replacer
forall x. Replacer -> Rep Replacer x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Replacer x -> Replacer
$cfrom :: forall x. Replacer -> Rep Replacer x
Generic)
#if defined(HAS_AESON)
  deriving (Eq Replacer
Eq Replacer
-> (Priority -> Replacer -> Priority)
-> (Replacer -> Priority)
-> Hashable Replacer
Priority -> Replacer -> Priority
Replacer -> Priority
forall a.
Eq a
-> (Priority -> a -> Priority) -> (a -> Priority) -> Hashable a
hash :: Replacer -> Priority
$chash :: Replacer -> Priority
hashWithSalt :: Priority -> Replacer -> Priority
$chashWithSalt :: Priority -> Replacer -> Priority
$cp1Hashable :: Eq Replacer
Hashable, Replacer -> ()
(Replacer -> ()) -> NFData Replacer
forall a. (a -> ()) -> NFData a
rnf :: Replacer -> ()
$crnf :: Replacer -> ()
NFData, Value -> Parser [Replacer]
Value -> Parser Replacer
(Value -> Parser Replacer)
-> (Value -> Parser [Replacer]) -> FromJSON Replacer
forall a.
(Value -> Parser a) -> (Value -> Parser [a]) -> FromJSON a
parseJSONList :: Value -> Parser [Replacer]
$cparseJSONList :: Value -> Parser [Replacer]
parseJSON :: Value -> Parser Replacer
$cparseJSON :: Value -> Parser Replacer
AE.FromJSON, [Replacer] -> Encoding
[Replacer] -> Value
Replacer -> Encoding
Replacer -> Value
(Replacer -> Value)
-> (Replacer -> Encoding)
-> ([Replacer] -> Value)
-> ([Replacer] -> Encoding)
-> ToJSON Replacer
forall a.
(a -> Value)
-> (a -> Encoding)
-> ([a] -> Value)
-> ([a] -> Encoding)
-> ToJSON a
toEncodingList :: [Replacer] -> Encoding
$ctoEncodingList :: [Replacer] -> Encoding
toJSONList :: [Replacer] -> Value
$ctoJSONList :: [Replacer] -> Value
toEncoding :: Replacer -> Encoding
$ctoEncoding :: Replacer -> Encoding
toJSON :: Replacer -> Value
$ctoJSON :: Replacer -> Value
AE.ToJSON)
#else
  deriving (Hashable, NFData)
#endif

-- | Build an Aho-Corasick automaton that can be used for performing fast
-- sequential replaces.
--
-- Case-insensitive matching performs per-letter language-agnostic lower-casing.
-- Therefore, it will work in most cases, but not in languages where lower-casing
-- depends on the context of the character in question.
--
-- We need to revisit this algorithm when we want to implement full Unicode
-- support.
build :: CaseSensitivity -> [(Needle, Replacement)] -> Replacer
build :: CaseSensitivity -> [(Replacement, Replacement)] -> Replacer
build CaseSensitivity
caseSensitivity [(Replacement, Replacement)]
replaces = CaseSensitivity -> Searcher Payload -> Replacer
Replacer CaseSensitivity
caseSensitivity Searcher Payload
searcher
  where
    searcher :: Searcher Payload
searcher = CaseSensitivity -> [(Replacement, Payload)] -> Searcher Payload
forall v.
Hashable v =>
CaseSensitivity -> [(Replacement, v)] -> Searcher v
Searcher.buildWithValues CaseSensitivity
caseSensitivity ([(Replacement, Payload)] -> Searcher Payload)
-> [(Replacement, Payload)] -> Searcher Payload
forall a b. (a -> b) -> a -> b
$ (Priority -> (Replacement, Replacement) -> (Replacement, Payload))
-> [Priority]
-> [(Replacement, Replacement)]
-> [(Replacement, Payload)]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Priority -> (Replacement, Replacement) -> (Replacement, Payload)
mapNeedle [Priority
0..] [(Replacement, Replacement)]
replaces
    mapNeedle :: Priority -> (Replacement, Replacement) -> (Replacement, Payload)
mapNeedle Priority
i (Replacement
needle, Replacement
replacement) =
      let
        needle' :: Replacement
needle' = case CaseSensitivity
caseSensitivity of
          CaseSensitivity
CaseSensitive -> Replacement
needle
          CaseSensitivity
IgnoreCase -> Replacement -> Replacement
Utf8.lowerUtf8 Replacement
needle
      in
        -- Note that we negate i: earlier needles have a higher priority. We
        -- could avoid it and define larger integers to be lower priority, but
        -- that made the terminology in this module very confusing.
        (Replacement
needle', Priority -> CodeUnitIndex -> Replacement -> Payload
Payload (-Priority
i) (Replacement -> CodeUnitIndex
Utf8.lengthUtf8 Replacement
needle') Replacement
replacement)

-- | Return the composition `replacer2` after `replacer1`, if they have the same
-- case sensitivity. If the case sensitivity differs, Nothing is returned.
compose :: Replacer -> Replacer -> Maybe Replacer
compose :: Replacer -> Replacer -> Maybe Replacer
compose (Replacer CaseSensitivity
case1 Searcher Payload
searcher1) (Replacer CaseSensitivity
case2 Searcher Payload
searcher2)
  | CaseSensitivity
case1 CaseSensitivity -> CaseSensitivity -> Bool
forall a. Eq a => a -> a -> Bool
/= CaseSensitivity
case2 = Maybe Replacer
forall a. Maybe a
Nothing
  | Bool
otherwise =
      let
        -- Replace the priorities of the second machine, so they all come after
        -- the first.
        renumber :: Priority -> (a, Payload) -> (a, Payload)
renumber Priority
i (a
needle, Payload Priority
_ CodeUnitIndex
len Replacement
replacement) = (a
needle, Priority -> CodeUnitIndex -> Replacement -> Payload
Payload (-Priority
i) CodeUnitIndex
len Replacement
replacement)
        needles1 :: [(Replacement, Payload)]
needles1 = Searcher Payload -> [(Replacement, Payload)]
forall v. Searcher v -> [(Replacement, v)]
Searcher.needles Searcher Payload
searcher1
        needles2 :: [(Replacement, Payload)]
needles2 = Searcher Payload -> [(Replacement, Payload)]
forall v. Searcher v -> [(Replacement, v)]
Searcher.needles Searcher Payload
searcher2
        searcher :: Searcher Payload
searcher = CaseSensitivity -> [(Replacement, Payload)] -> Searcher Payload
forall v.
Hashable v =>
CaseSensitivity -> [(Replacement, v)] -> Searcher v
Searcher.buildWithValues CaseSensitivity
case1 ([(Replacement, Payload)] -> Searcher Payload)
-> [(Replacement, Payload)] -> Searcher Payload
forall a b. (a -> b) -> a -> b
$ (Priority -> (Replacement, Payload) -> (Replacement, Payload))
-> [Priority]
-> [(Replacement, Payload)]
-> [(Replacement, Payload)]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Priority -> (Replacement, Payload) -> (Replacement, Payload)
forall a. Priority -> (a, Payload) -> (a, Payload)
renumber [Priority
0..] ([(Replacement, Payload)]
needles1 [(Replacement, Payload)]
-> [(Replacement, Payload)] -> [(Replacement, Payload)]
forall a. [a] -> [a] -> [a]
++ [(Replacement, Payload)]
needles2)
      in
        Replacer -> Maybe Replacer
forall a. a -> Maybe a
Just (Replacer -> Maybe Replacer) -> Replacer -> Maybe Replacer
forall a b. (a -> b) -> a -> b
$ CaseSensitivity -> Searcher Payload -> Replacer
Replacer CaseSensitivity
case1 Searcher Payload
searcher

-- A match collected while running replacements. It is isomorphic to the Match
-- reported by the automaton, but the data is arranged in a more useful way:
-- as the start index and length of the match, and the replacement.
data Match = Match !CodeUnitIndex !CodeUnitIndex !Text deriving (Match -> Match -> Bool
(Match -> Match -> Bool) -> (Match -> Match -> Bool) -> Eq Match
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Match -> Match -> Bool
$c/= :: Match -> Match -> Bool
== :: Match -> Match -> Bool
$c== :: Match -> Match -> Bool
Eq, Eq Match
Eq Match
-> (Match -> Match -> Ordering)
-> (Match -> Match -> Bool)
-> (Match -> Match -> Bool)
-> (Match -> Match -> Bool)
-> (Match -> Match -> Bool)
-> (Match -> Match -> Match)
-> (Match -> Match -> Match)
-> Ord Match
Match -> Match -> Bool
Match -> Match -> Ordering
Match -> Match -> Match
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Match -> Match -> Match
$cmin :: Match -> Match -> Match
max :: Match -> Match -> Match
$cmax :: Match -> Match -> Match
>= :: Match -> Match -> Bool
$c>= :: Match -> Match -> Bool
> :: Match -> Match -> Bool
$c> :: Match -> Match -> Bool
<= :: Match -> Match -> Bool
$c<= :: Match -> Match -> Bool
< :: Match -> Match -> Bool
$c< :: Match -> Match -> Bool
compare :: Match -> Match -> Ordering
$ccompare :: Match -> Match -> Ordering
$cp1Ord :: Eq Match
Ord, Priority -> Match -> ShowS
[Match] -> ShowS
Match -> String
(Priority -> Match -> ShowS)
-> (Match -> String) -> ([Match] -> ShowS) -> Show Match
forall a.
(Priority -> a -> ShowS)
-> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Match] -> ShowS
$cshowList :: [Match] -> ShowS
show :: Match -> String
$cshow :: Match -> String
showsPrec :: Priority -> Match -> ShowS
$cshowsPrec :: Priority -> Match -> ShowS
Show)

-- | Apply replacements of all matches. Assumes that the matches are ordered by
-- match position, and that no matches overlap.
replace :: [Match] -> Text -> Text
replace :: [Match] -> Replacement -> Replacement
replace [Match]
matches Replacement
haystack = [Replacement] -> Replacement
Utf8.concat ([Replacement] -> Replacement) -> [Replacement] -> Replacement
forall a b. (a -> b) -> a -> b
$ CodeUnitIndex -> [Match] -> Replacement -> [Replacement]
go CodeUnitIndex
0 [Match]
matches Replacement
haystack
  where
    -- At every match, cut the string into three pieces, removing the match.
    -- Because a Text is a buffer pointer and (offset, length), cutting does not
    -- involve string copies. Only at the very end we piece together the strings
    -- again, so Text can allocate a buffer of the right length and memcpy the
    -- parts into the new target string.
    -- If `k` is a code unit index into the original text, then `k - offset`
    -- is an index into `remainder`. In other words, `offset` is the index into
    -- the original text where `remainder` starts.
    go :: CodeUnitIndex -> [Match] -> Text -> [Text]
    go :: CodeUnitIndex -> [Match] -> Replacement -> [Replacement]
go !CodeUnitIndex
_offset [] Replacement
remainder = [Replacement
remainder]
    go !CodeUnitIndex
offset ((Match CodeUnitIndex
pos CodeUnitIndex
len Replacement
replacement) : [Match]
ms) Replacement
remainder =
      let
        (Replacement
prefix, Replacement
suffix) = CodeUnitIndex
-> CodeUnitIndex -> Replacement -> (Replacement, Replacement)
Utf8.unsafeCutUtf8 (CodeUnitIndex
pos CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
offset) CodeUnitIndex
len Replacement
remainder
      in
        Replacement
prefix Replacement -> [Replacement] -> [Replacement]
forall a. a -> [a] -> [a]
: Replacement
replacement Replacement -> [Replacement] -> [Replacement]
forall a. a -> [a] -> [a]
: CodeUnitIndex -> [Match] -> Replacement -> [Replacement]
go (CodeUnitIndex
pos CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
len) [Match]
ms Replacement
suffix

-- | Compute the length of the string resulting from applying the replacements.
replacementLength :: [Match] -> Text -> CodeUnitIndex
replacementLength :: [Match] -> Replacement -> CodeUnitIndex
replacementLength [Match]
matches Replacement
initial  = [Match] -> CodeUnitIndex -> CodeUnitIndex
go [Match]
matches (Replacement -> CodeUnitIndex
Utf8.lengthUtf8 Replacement
initial)
  where
    go :: [Match] -> CodeUnitIndex -> CodeUnitIndex
go [] !CodeUnitIndex
acc = CodeUnitIndex
acc
    go (Match CodeUnitIndex
_ CodeUnitIndex
matchLen Replacement
repl : [Match]
rest) !CodeUnitIndex
acc = [Match] -> CodeUnitIndex -> CodeUnitIndex
go [Match]
rest (CodeUnitIndex
acc CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
matchLen CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ Replacement -> CodeUnitIndex
Utf8.lengthUtf8 Replacement
repl)

-- | Given a list of matches sorted on start position, remove matches that start
-- within an earlier match.
removeOverlap :: [Match] -> [Match]
removeOverlap :: [Match] -> [Match]
removeOverlap [Match]
matches = case [Match]
matches of
  [] -> []
  [Match
m] -> [Match
m]
  (m0 :: Match
m0@(Match CodeUnitIndex
pos0 CodeUnitIndex
len0 Replacement
_) : m1 :: Match
m1@(Match CodeUnitIndex
pos1 CodeUnitIndex
_ Replacement
_) : [Match]
ms) ->
    if CodeUnitIndex
pos1 CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
>= CodeUnitIndex
pos0 CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
len0
      then Match
m0 Match -> [Match] -> [Match]
forall a. a -> [a] -> [a]
: [Match] -> [Match]
removeOverlap (Match
m1Match -> [Match] -> [Match]
forall a. a -> [a] -> [a]
:[Match]
ms)
      else [Match] -> [Match]
removeOverlap (Match
m0Match -> [Match] -> [Match]
forall a. a -> [a] -> [a]
:[Match]
ms)

-- | When we iterate through all matches, keep track only of the matches with
-- the highest priority: those are the ones that we will replace first. If we
-- find multiple matches with that priority, remember all of them. If we find a
-- match with lower priority, ignore it, because we already have a more
-- important match. Also, if the priority is `threshold` or higher, ignore the
-- match, so we can exclude matches if we already did a round of replacements
-- for that priority. This way we don't have to build a new automaton after
-- every round of replacements.
{-# INLINE prependMatch #-}
prependMatch :: Priority -> (Priority, [Match]) -> Aho.Match Payload -> Aho.Next (Priority, [Match])
prependMatch :: Priority
-> (Priority, [Match]) -> Match Payload -> Next (Priority, [Match])
prependMatch !Priority
threshold (!Priority
pBest, ![Match]
matches) (Aho.Match CodeUnitIndex
pos (Payload Priority
pMatch CodeUnitIndex
len Replacement
replacement))
  | Priority
pMatch Priority -> Priority -> Bool
forall a. Ord a => a -> a -> Bool
< Priority
threshold Bool -> Bool -> Bool
&& Priority
pMatch Priority -> Priority -> Bool
forall a. Ord a => a -> a -> Bool
>  Priority
pBest = (Priority, [Match]) -> Next (Priority, [Match])
forall a. a -> Next a
Aho.Step (Priority
pMatch, [CodeUnitIndex -> CodeUnitIndex -> Replacement -> Match
Match (CodeUnitIndex
pos CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
len) CodeUnitIndex
len Replacement
replacement])
  | Priority
pMatch Priority -> Priority -> Bool
forall a. Ord a => a -> a -> Bool
< Priority
threshold Bool -> Bool -> Bool
&& Priority
pMatch Priority -> Priority -> Bool
forall a. Eq a => a -> a -> Bool
== Priority
pBest = (Priority, [Match]) -> Next (Priority, [Match])
forall a. a -> Next a
Aho.Step (Priority
pMatch, CodeUnitIndex -> CodeUnitIndex -> Replacement -> Match
Match (CodeUnitIndex
pos CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- CodeUnitIndex
len) CodeUnitIndex
len Replacement
replacement Match -> [Match] -> [Match]
forall a. a -> [a] -> [a]
: [Match]
matches)
  | Bool
otherwise = (Priority, [Match]) -> Next (Priority, [Match])
forall a. a -> Next a
Aho.Step (Priority
pBest, [Match]
matches)

run :: Replacer -> Text -> Text
run :: Replacer -> Replacement -> Replacement
run Replacer
replacer = Maybe Replacement -> Replacement
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Replacement -> Replacement)
-> (Replacement -> Maybe Replacement) -> Replacement -> Replacement
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Replacer -> CodeUnitIndex -> Replacement -> Maybe Replacement
runWithLimit Replacer
replacer CodeUnitIndex
forall a. Bounded a => a
maxBound

{-# NOINLINE runWithLimit #-}
runWithLimit :: Replacer -> CodeUnitIndex -> Text -> Maybe Text
runWithLimit :: Replacer -> CodeUnitIndex -> Replacement -> Maybe Replacement
runWithLimit (Replacer CaseSensitivity
case_ Searcher Payload
searcher) CodeUnitIndex
maxLength = Priority -> Replacement -> Maybe Replacement
go Priority
initialThreshold
  where
    !automaton :: AcMachine Payload
automaton = Searcher Payload -> AcMachine Payload
forall v. Searcher v -> AcMachine v
Searcher.automaton Searcher Payload
searcher

    -- Priorities are 0 or lower, so an initial threshold of 1 keeps all
    -- matches.
    !initialThreshold :: Priority
initialThreshold = Priority
1

    -- Needle priorities go from 0 for the highest priority to (-numNeedles + 1)
    -- for the lowest priority. That means that if we find a match with
    -- minPriority, we don't need to do another pass afterwards, because there
    -- are no remaining needles.
    !minPriority :: Priority
minPriority = Priority
1 Priority -> Priority -> Priority
forall a. Num a => a -> a -> a
- Searcher Payload -> Priority
forall v. Searcher v -> Priority
Searcher.numNeedles Searcher Payload
searcher

    go :: Priority -> Text -> Maybe Text
    go :: Priority -> Replacement -> Maybe Replacement
go !Priority
threshold Replacement
haystack =
      let
        seed :: (Priority, [a])
seed = (Priority
forall a. Bounded a => a
minBound :: Priority, [])
        matchesWithPriority :: (Priority, [Match])
matchesWithPriority = case CaseSensitivity
case_ of
          CaseSensitivity
CaseSensitive -> (Priority, [Match])
-> ((Priority, [Match])
    -> Match Payload -> Next (Priority, [Match]))
-> AcMachine Payload
-> Replacement
-> (Priority, [Match])
forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Replacement -> a
Aho.runText (Priority, [Match])
forall a. (Priority, [a])
seed (Priority
-> (Priority, [Match]) -> Match Payload -> Next (Priority, [Match])
prependMatch Priority
threshold) AcMachine Payload
automaton Replacement
haystack
          CaseSensitivity
IgnoreCase -> (Priority, [Match])
-> ((Priority, [Match])
    -> Match Payload -> Next (Priority, [Match]))
-> AcMachine Payload
-> Replacement
-> (Priority, [Match])
forall a v.
a -> (a -> Match v -> Next a) -> AcMachine v -> Replacement -> a
Aho.runLower (Priority, [Match])
forall a. (Priority, [a])
seed (Priority
-> (Priority, [Match]) -> Match Payload -> Next (Priority, [Match])
prependMatch Priority
threshold) AcMachine Payload
automaton Replacement
haystack
      in
        case (Priority, [Match])
matchesWithPriority of
          -- No match at the given threshold, there is nothing left to do.
          -- Return the input string unmodified.
          (Priority
_, []) -> Replacement -> Maybe Replacement
forall a. a -> Maybe a
Just Replacement
haystack
          -- We found matches at priority p. Remove overlapping matches, then
          -- apply all replacements. Next, we need to go again, this time
          -- considering only needles with a lower priority than p. As an
          -- optimization (which matters mainly for the single needle case),
          -- if we find a match at the lowest priority, we don't need another
          -- pass. Note that if in `rawMatches` we find only matches of priority
          -- p > minPriority, then we do still need another pass, because the
          -- replacements could create new matches.
          (Priority
p, [Match]
matches)
            | [Match] -> Replacement -> CodeUnitIndex
replacementLength [Match]
matches Replacement
haystack CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
> CodeUnitIndex
maxLength -> Maybe Replacement
forall a. Maybe a
Nothing
            | Priority
p Priority -> Priority -> Bool
forall a. Eq a => a -> a -> Bool
== Priority
minPriority -> Replacement -> Maybe Replacement
forall a. a -> Maybe a
Just (Replacement -> Maybe Replacement)
-> Replacement -> Maybe Replacement
forall a b. (a -> b) -> a -> b
$ [Match] -> Replacement -> Replacement
replace ([Match] -> [Match]
removeOverlap ([Match] -> [Match]) -> [Match] -> [Match]
forall a b. (a -> b) -> a -> b
$ [Match] -> [Match]
forall a. Ord a => [a] -> [a]
sort [Match]
matches) Replacement
haystack
            | Bool
otherwise -> Priority -> Replacement -> Maybe Replacement
go Priority
p (Replacement -> Maybe Replacement)
-> Replacement -> Maybe Replacement
forall a b. (a -> b) -> a -> b
$ [Match] -> Replacement -> Replacement
replace ([Match] -> [Match]
removeOverlap ([Match] -> [Match]) -> [Match] -> [Match]
forall a b. (a -> b) -> a -> b
$ [Match] -> [Match]
forall a. Ord a => [a] -> [a]
sort [Match]
matches) Replacement
haystack