-- Alfred-Margaret: Fast Aho-Corasick string searching
-- Copyright 2019 Channable
--
-- Licensed under the 3-clause BSD license, see the LICENSE file in the
-- repository root.

{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE FlexibleInstances #-}

module Data.Text.BoyerMoore.Replacer
    ( -- Replacer
      replaceSingleLimited
    ) where

import Data.Text.Utf8 (Text)
import Data.Text.BoyerMoore.Automaton (Automaton, CodeUnitIndex)

import qualified Data.Text.Utf8 as Text
import qualified Data.Text.Utf8 as Utf8
import qualified Data.Text.BoyerMoore.Automaton as BoyerMoore

-- | Replace all occurrences matched by the Boyer-Moore automaton
-- with the given replacement text in some haystack.
-- Performs case-sensitive replacement.
replaceSingleLimited
  :: Automaton -- ^ Matches the needles
  -> Text -- ^ Replacement string
  -> Text -- ^ Haystack
  -> CodeUnitIndex -- ^ Maximum number of code units in the returned text
  -> Maybe Text
replaceSingleLimited :: Automaton -> Text -> Text -> CodeUnitIndex -> Maybe Text
replaceSingleLimited Automaton
needle Text
replacement Text
haystack CodeUnitIndex
maxLength
  | CodeUnitIndex
needleLength CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Eq a => a -> a -> Bool
== CodeUnitIndex
0 = Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ if CodeUnitIndex
haystackLength CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Eq a => a -> a -> Bool
== CodeUnitIndex
0 then Text
replacement else Text
haystack
  | Bool
otherwise = ReplaceState -> Maybe Text
finish (ReplaceState -> Maybe Text) -> ReplaceState -> Maybe Text
forall a b. (a -> b) -> a -> b
$ ReplaceState
-> (ReplaceState -> CodeUnitIndex -> Next ReplaceState)
-> Automaton
-> Text
-> ReplaceState
forall a.
a -> (a -> CodeUnitIndex -> Next a) -> Automaton -> Text -> a
BoyerMoore.runText ReplaceState
initial ReplaceState -> CodeUnitIndex -> Next ReplaceState
foundMatch Automaton
needle Text
haystack
  where
    needleLength :: CodeUnitIndex
needleLength = Automaton -> CodeUnitIndex
BoyerMoore.patternLength Automaton
needle
    haystackLength :: CodeUnitIndex
haystackLength = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
haystack
    replacementLength :: CodeUnitIndex
replacementLength = Text -> CodeUnitIndex
Utf8.lengthUtf8 Text
replacement

    initial :: ReplaceState
initial = ReplaceState :: [Text] -> CodeUnitIndex -> CodeUnitIndex -> ReplaceState
ReplaceState
      { rsChunks :: [Text]
rsChunks = []
      , rsPreviousMatchEnd :: CodeUnitIndex
rsPreviousMatchEnd = CodeUnitIndex
0
      , rsLength :: CodeUnitIndex
rsLength = CodeUnitIndex
0
      }

    foundMatch :: ReplaceState -> CodeUnitIndex -> Next ReplaceState
foundMatch ReplaceState
rs CodeUnitIndex
matchStart =
      let
        matchEnd :: CodeUnitIndex
matchEnd = CodeUnitIndex
matchStart CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
needleLength

        -- Slice the part of the haystack between the end of the previous match
        -- and the start of the current match
        haystackPartLength :: CodeUnitIndex
haystackPartLength = CodeUnitIndex
matchStart CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd ReplaceState
rs
        haystackPart :: Text
haystackPart = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
Utf8.unsafeSliceUtf8 (ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd ReplaceState
rs) CodeUnitIndex
haystackPartLength Text
haystack

        -- Add the preceding part of the haystack and the replacement in reverse
        -- order to the chunk list (all chunks will be reversed at once in the final step).
        newChunks :: [Text]
newChunks = Text
replacement Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: Text
haystackPart Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: ReplaceState -> [Text]
rsChunks ReplaceState
rs
        newLength :: CodeUnitIndex
newLength = CodeUnitIndex
replacementLength CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
haystackPartLength CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ ReplaceState -> CodeUnitIndex
rsLength ReplaceState
rs

        newState :: ReplaceState
newState = ReplaceState :: [Text] -> CodeUnitIndex -> CodeUnitIndex -> ReplaceState
ReplaceState
          { rsChunks :: [Text]
rsChunks = [Text]
newChunks
          , rsPreviousMatchEnd :: CodeUnitIndex
rsPreviousMatchEnd = CodeUnitIndex
matchEnd
          , rsLength :: CodeUnitIndex
rsLength = CodeUnitIndex
newLength
          }
      in
        if CodeUnitIndex
newLength CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
> CodeUnitIndex
maxLength
          then ReplaceState -> Next ReplaceState
forall a. a -> Next a
BoyerMoore.Done ReplaceState
newState
          else ReplaceState -> Next ReplaceState
forall a. a -> Next a
BoyerMoore.Step ReplaceState
newState

    finish :: ReplaceState -> Maybe Text
finish ReplaceState
rs =
      let
        -- Slice the remaining part of the haystack from the end of the last match
        -- to the end of the haystack.
        haystackPartLength :: CodeUnitIndex
haystackPartLength = CodeUnitIndex
haystackLength CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
- ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd ReplaceState
rs
        finalChunks :: [Text]
finalChunks
            = CodeUnitIndex -> CodeUnitIndex -> Text -> Text
Utf8.unsafeSliceUtf8 (ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd ReplaceState
rs) CodeUnitIndex
haystackPartLength Text
haystack
            Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: ReplaceState -> [Text]
rsChunks ReplaceState
rs
        finalLength :: CodeUnitIndex
finalLength = ReplaceState -> CodeUnitIndex
rsLength ReplaceState
rs CodeUnitIndex -> CodeUnitIndex -> CodeUnitIndex
forall a. Num a => a -> a -> a
+ CodeUnitIndex
haystackPartLength
      in
        if CodeUnitIndex
finalLength CodeUnitIndex -> CodeUnitIndex -> Bool
forall a. Ord a => a -> a -> Bool
> CodeUnitIndex
maxLength
          then Maybe Text
forall a. Maybe a
Nothing
          else Text -> Maybe Text
forall a. a -> Maybe a
Just (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Text.concat ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [Text] -> [Text]
forall a. [a] -> [a]
reverse [Text]
finalChunks

-- | Internal accumulator state for performing a replace while stepping an automaton
data ReplaceState = ReplaceState
  { ReplaceState -> [Text]
rsChunks :: [Text]
    -- ^ Chunks of the final text, in reverse order so that we can efficiently prepend
  , ReplaceState -> CodeUnitIndex
rsPreviousMatchEnd :: !CodeUnitIndex
    -- ^ Index one past the end of the last match.
  , ReplaceState -> CodeUnitIndex
rsLength :: !CodeUnitIndex
    -- ^ Length of the newly build string so far, measured in CodeUnits
  }