Data/ByteString/Search/Internal/BoyerMoore.hs

{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK hide, prune #-}
-- |
-- Module         : Data.ByteString.Search.Internal.BoyerMoore
-- Copyright      : Daniel Fischer
--                  Chris Kuklewicz
-- Licence        : BSD3
-- Maintainer     : Daniel Fischer <daniel.is.fischer@web.de>
-- Stability      : Provisional
-- Portability    : non-portable (BangPatterns)
--
-- Fast overlapping Boyer-Moore search of both strict and lazy
-- 'S.ByteString' values. Breaking, splitting and replacing
-- using the Boyer-Moore algorithm.
--
-- Descriptions of the algorithm can be found at
-- <http://www-igm.univ-mlv.fr/~lecroq/string/node14.html#SECTION00140>
-- and
-- <http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm>
--
-- Original authors: Daniel Fischer (daniel.is.fischer at web.de) and
-- Chris Kuklewicz (haskell at list.mightyreason.com).

module Data.ByteString.Search.Internal.BoyerMoore (
                                           matchLL
                                         , matchLS
                                         , matchSL
                                         , matchSS

                                           --  Non-overlapping
                                         , matchNOL
                                         , matchNOS

                                            --  Replacing substrings
                                            -- replacing
                                         , replaceAllS
                                         , replaceAllL
                                            --  Breaking on substrings
                                            -- breaking
                                         , breakSubstringS
                                         , breakAfterS
                                         , breakSubstringL
                                         , breakAfterL
                                         , breakFindAfterL
                                            --  Splitting on substrings
                                            -- splitting
                                         , splitKeepEndS
                                         , splitKeepFrontS
                                         , splitDropS
                                         , splitKeepEndL
                                         , splitKeepFrontL
                                         , splitDropL
                                         ) where


import Data.ByteString.Search.Internal.Utils
                (ldrop, lsplit, keep, release, strictify)
import Data.ByteString.Search.Substitution

import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.Internal as LI
import Data.ByteString.Unsafe (unsafeIndex)

import Data.Array.Base (unsafeRead, unsafeWrite, unsafeAt)
import Data.Array.ST
import Data.Array.Unboxed

import Data.Word (Word8)
import Data.Int (Int64)

-- overview
--
-- This module exports four search functions, one for each combination of
-- strict and lazy ByteStrings as pattern and target. The common base name
-- is @match@, the two-letter suffix indicates the types of the pattern
-- (first letter of the suffix) and target (second letter). These functions
-- return (for a non-empty pattern) a list of all the indices of the target
-- string where an occurrence of the pattern begins, if some occurrences
-- overlap, all starting indices are reported. The list is produced lazily,
-- so not necessarily the entire target string is searched.
--
-- The behaviour of these functions when given an empty pattern has changed.
-- Formerly, the @matchXY@ functions returned an empty list then, now it's
-- @[0 .. 'length' target]@.
--
-- Newly added are functions to replace all (non-overlapping) occurrences
-- of a pattern within a string, functions to break ByteStrings at the first
-- occurrence of a pattern and functions to split ByteStrings at each
-- occurrence of a pattern. None of these functions does copying, so they
-- don't introduce large memory overhead.
--
-- Internally, a lazy pattern is always converted to a strict ByteString,
-- which is necessary for an efficient implementation of the algorithm.
-- The limit this imposes on the length of the pattern is probably
-- irrelevant in practice, but perhaps it should be mentioned.
-- This also means that the @matchL*@ functions are mere convenience wrappers.
-- Except for the initial 'strictify'ing, there's no difference between lazy
-- and strict patterns, they call the same workers. There is, however, a
-- difference between strict and lazy target strings.
-- For the new functions, no such wrappers are provided, you have to
-- 'strictify' lazy patterns yourself.

-- caution
--
-- When working with a lazy target string, the relation between the pattern
-- length and the chunk size can play a big r&#244;le.
-- Crossing chunk boundaries is relatively expensive, so when that becomes
-- a frequent occurrence, as may happen when the pattern length is close
-- to or larger than the chunk size, performance is likely to degrade.
-- If it is needed, steps can be taken to ameliorate that effect, but unless
-- entirely separate functions are introduced, that would hurt the
-- performance for the more common case of patterns much shorter than
-- the default chunk size.

-- performance
--
-- In general, the Boyer-Moore algorithm is the most efficient method to
-- search for a pattern inside a string, so most of the time, you'll want
-- to use the functions of this module, hence this is where the most work
-- has gone. Very short patterns are an exception to this, for those you
-- should consider using a finite automaton
-- ("Data.ByteString.Search.DFA.Array"). That is also often the better
-- choice for searching longer periodic patterns in a lazy ByteString
-- with many matches.
--
-- Operating on a strict target string is mostly faster than on a lazy
-- target string, but the difference is usually small (according to my
-- tests).
--
-- The known exceptions to this rule of thumb are
--
-- [long targets] Then the smaller memory footprint of a lazy target often
-- gives (much) better performance.
--
-- [high number of matches] When there are very many matches, strict target
-- strings are much faster, especially if the pattern is periodic.
--
-- If both conditions hold, either may outweigh the other.

-- complexity
--
-- Preprocessing the pattern is /O/(@patternLength@ + &#963;) in time and
-- space (&#963; is the alphabet size, 256 here) for all functions.
-- The time complexity of the searching phase for @matchXY@
-- is /O/(@targetLength@ \/ @patternLength@) in the best case.
-- For non-periodic patterns, the worst case complexity is
-- /O/(@targetLength@), but for periodic patterns, the worst case complexity
-- is /O/(@targetLength@ * @patternLength@) for the original Boyer-Moore
-- algorithm.
--
-- The searching functions in this module now contain a modification which
-- drastically improves the performance for periodic patterns.
-- I believe that for strict target strings, the worst case is now
-- /O/(@targetLength@) also for periodic patterns and for lazy target strings,
-- my semi-educated guess is
-- /O/(@targetLength@ * (1 + @patternLength@ \/ @chunkSize@)).
-- I may be very wrong, though.
--
-- The other functions don't have to deal with possible overlapping
-- patterns, hence the worst case complexity for the processing phase
-- is /O/(@targetLength@) (respectively /O/(@firstIndex + patternLength@)
-- for the breaking functions if the pattern occurs).

-- currying
--
-- These functions can all be usefully curried. Given only a pattern
-- the curried version will compute the supporting lookup tables only
-- once, allowing for efficient re-use.  Similarly, the curried
-- 'matchLL' and 'matchLS' will compute the concatenated pattern only
-- once.

-- overflow
--
-- The current code uses @Int@ to keep track of the locations in the
-- target string.  If the length of the pattern plus the length of any
-- strict chunk of the target string is greater than
-- @'maxBound' :: 'Int'@ then this will overflow causing an error.  We
-- try to detect this and call 'error' before a segfault occurs.

------------------------------------------------------------------------------
--                                 Wrappers                                 --
------------------------------------------------------------------------------

-- matching
--
-- These functions find the indices of all (possibly overlapping)
-- occurrences of a pattern in a target string.
-- If the pattern is empty, the result is @[0 .. length target]@.
-- If the pattern is much shorter than the target string
-- and the pattern does not occur very near the beginning of the target,
--
-- > not . null $ matchSS pattern target
--
-- is a much more efficient version of 'S.isInfixOf'.

-- | @matchLL@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   It is a simple wrapper for 'Data.ByteString.Lazy.Search.indices'.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE matchLL #-}
matchLL :: L.ByteString     -- ^ Lazy pattern
        -> L.ByteString     -- ^ Lazy target string
        -> [Int64]          -- ^ Offsets of matches
matchLL pat = search . L.toChunks
  where
    search  = lazySearcher True (strictify pat)

-- | @matchLS@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   It is a simple wrapper for 'Data.ByteString.Search.indices'.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE matchLS #-}
matchLS :: L.ByteString     -- ^ Lazy pattern
        -> S.ByteString     -- ^ Strict target string
        -> [Int]            -- ^ Offsets of matches
matchLS pat = search
  where
    search = strictSearcher True (strictify pat)

-- | @matchSL@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   It is an alias for 'Data.ByteString.Lazy.Search.indices'.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE matchSL #-}
matchSL :: S.ByteString     -- ^ Strict pattern
        -> L.ByteString     -- ^ Lazy target string
        -> [Int64]          -- ^ Offsets of matches
matchSL pat = search . L.toChunks
  where
    search = lazySearcher True pat

-- | @matchSS@ finds the starting indices of all possibly overlapping
--   occurrences of the pattern in the target string.
--   It is an alias for 'Data.ByteString.Search.indices'.
--   If the pattern is empty, the result is @[0 .. 'length' target]@.
{-# INLINE matchSS #-}
matchSS :: S.ByteString     -- ^ Strict pattern
        -> S.ByteString     -- ^ Strict target string
        -> [Int]            -- ^ Offsets of matches
matchSS pat = search
  where
    search = strictSearcher True pat

-- | matchNOL finds the indices of all non-overlapping occurrences
--   of the pattern in the lazy target string.
{-# INLINE matchNOL #-}
matchNOL :: S.ByteString    -- ^ Strict pattern
         -> L.ByteString    -- ^ Lazy target string
         -> [Int64]         -- ^ Offsets of matches
matchNOL pat = search . L.toChunks
  where
    search = lazySearcher False pat

-- | matchNOS finds the indices of all non-overlapping occurrences
--   of the pattern in the Strict target string.
{-# INLINE matchNOS #-}
matchNOS :: S.ByteString    -- ^ Strict pattern
         -> S.ByteString    -- ^ Strict target string
         -> [Int]           -- ^ Offsets of matches
matchNOS pat = search
  where
    search = strictSearcher False pat

-- replacing
--
--   These functions replace all (non-overlapping) occurrences of a pattern
--   in the target string. If some occurrences overlap, the earliest is
--   replaced and replacing continues at the index after the replaced
--   occurrence, for example
--
-- > replaceAllL \"ana\" \"olog\" \"banana\" == \"bologna\",
-- > replaceAllS \"abacab\" \"u\" \"abacabacabacab\" == \"uacu\",
-- > replaceAllS \"aa\" \"aaa\" \"aaaa\" == \"aaaaaa\".
--
--   Equality of pattern and substitution is not checked, but
--
-- > pat == sub => 'strictify' (replaceAllS pat sub str) == str,
-- > pat == sub => replaceAllL pat sub str == str.
--
--   The result is a lazily generated lazy ByteString, the first chunks will
--   generally be available before the entire target has been scanned.
--   If the pattern is empty, but not the substitution, the result is
--   equivalent to @'cycle' sub@.

{-# INLINE replaceAllS #-}
replaceAllS :: Substitution rep
            => S.ByteString  -- ^ Pattern to replace
            -> rep           -- ^ Substitution string
            -> S.ByteString  -- ^ Target string
            -> L.ByteString  -- ^ Lazy result
replaceAllS pat
    | S.null pat = \sub -> prependCycle sub . flip LI.chunk LI.Empty
    | otherwise =
      let repl = strictRepl pat
      in \sub -> L.fromChunks . repl (substitution sub)

{-# INLINE replaceAllL #-}
replaceAllL :: Substitution rep
            => S.ByteString  -- ^ Pattern to replace
            -> rep           -- ^ Substitution string
            -> L.ByteString  -- ^ Target string
            -> L.ByteString  -- ^ Lazy result
replaceAllL pat
    | S.null pat = \sub -> prependCycle sub
    | S.length pat == 1 =
      let breaker = lazyBreak pat
          repl subst strs
              | null strs = []
              | otherwise =
                case breaker strs of
                  (pre, mtch) ->
                        pre ++ case mtch of
                                [] -> []
                                _  -> subst (repl subst (ldrop 1 mtch))
      in \sub -> let repl1 = repl (substitution sub)
                 in L.fromChunks . repl1 . L.toChunks
    | otherwise =
      let repl = lazyRepl pat
      in \sub -> let repl1 = repl (substitution sub)
                 in L.fromChunks . repl1 . L.toChunks

-- breaking
--
-- Break a string on a pattern. The first component of the result
-- contains the prefix of the string before the first occurrence of the
-- pattern, the second component contains the remainder.
-- The following relations hold:
--
-- > breakSubstringX \"\" str = (\"\", str)
-- > not (pat `isInfixOf` str) == null (snd $ breakSunbstringX pat str)
-- > True == case breakSubstringX pat str of
-- >          (x, y) -> not (pat `isInfixOf` x)
-- >                       && (null y || pat `isPrefixOf` y)

-- | This function has the same semantics as 'S.breakSubstring'
--   but is generally much faster.
{-# INLINE breakSubstringS #-}
breakSubstringS :: S.ByteString  -- ^ Pattern to break on
                -> S.ByteString  -- ^ String to break up
                -> (S.ByteString, S.ByteString)
                    -- ^ Prefix and remainder of broken string
breakSubstringS = strictBreak

breakAfterS :: S.ByteString
            -> S.ByteString
            -> (S.ByteString, S.ByteString)
breakAfterS pat
  | S.null pat = \str -> (S.empty, str)
breakAfterS pat = breaker
  where
    !patLen  = S.length pat
    searcher = strictSearcher False pat
    breaker str = case searcher str of
                    []    -> (str, S.empty)
                    (i:_) -> S.splitAt (i + patLen) str

-- | The analogous function for a lazy target string.
--   The first component is generated lazily, so parts of it can be
--   available before the pattern is detected (or found to be absent).
{-# INLINE breakSubstringL #-}
breakSubstringL :: S.ByteString  -- ^ Pattern to break on
                -> L.ByteString  -- ^ String to break up
                -> (L.ByteString, L.ByteString)
                    -- ^ Prefix and remainder of broken string
breakSubstringL pat = breaker . L.toChunks
  where
    lbrk = lazyBreak pat
    breaker strs = let (f, b) = lbrk strs
                   in (L.fromChunks f, L.fromChunks b)

breakAfterL :: S.ByteString
            -> L.ByteString
            -> (L.ByteString, L.ByteString)
breakAfterL pat
  | S.null pat      = \str -> (L.empty, str)
breakAfterL pat     = breaker' . L.toChunks
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    breaker' strs =
      let (pre, mtch) = breaker strs
          (pl, a) = if null mtch then ([],[]) else lsplit patLen mtch
      in (L.fromChunks (pre ++ pl), L.fromChunks a)

breakFindAfterL :: S.ByteString
                -> L.ByteString
                -> ((L.ByteString, L.ByteString), Bool)
breakFindAfterL pat
  | S.null pat  = \str -> ((L.empty, str), True)
breakFindAfterL pat = breaker' . L.toChunks
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    breaker' strs =
      let (pre, mtch) = breaker strs
          (pl, a) = if null mtch then ([],[]) else lsplit patLen mtch
      in ((L.fromChunks (pre ++ pl), L.fromChunks a), not (null mtch))

-- splitting
--
-- These functions implement various splitting strategies.
--
-- If the pattern to split on is empty, all functions return an
-- infinite list of empty ByteStrings.
-- Otherwise, the names are rather self-explanatory.
--
-- For nonempty patterns, the following relations hold:
--
-- > concat (splitKeepXY pat str) == str
-- > concat ('Data.List.intersperse' pat (splitDropX pat str)) == str.
--
-- All fragments except possibly the last in the result of
-- @splitKeepEndX pat@ end with @pat@, none of the fragments contains
-- more than one occurrence of @pat@ or is empty.
--
-- All fragments except possibly the first in the result of
-- @splitKeepFrontX pat@ begin with @pat@, none of the fragments
-- contains more than one occurrence of @patq or is empty.
--
-- > splitDropX pat str == map dropPat (splitKeepFrontX pat str)
-- >   where
-- >     patLen = length pat
-- >     dropPat frag
-- >        | pat `isPrefixOf` frag = drop patLen frag
-- >        | otherwise             = frag
--
-- but @splitDropX@ is a little more efficient than that.


{-# INLINE splitKeepEndS #-}
splitKeepEndS :: S.ByteString    -- ^ Pattern to split on
              -> S.ByteString    -- ^ String to split
              -> [S.ByteString]  -- ^ List of fragments
splitKeepEndS = strictSplitKeepEnd

{-# INLINE splitKeepFrontS #-}
splitKeepFrontS :: S.ByteString    -- ^ Pattern to split on
                -> S.ByteString    -- ^ String to split
                -> [S.ByteString]  -- ^ List of fragments
splitKeepFrontS = strictSplitKeepFront

{-# INLINE splitDropS #-}
splitDropS :: S.ByteString    -- ^ Pattern to split on
           -> S.ByteString    -- ^ String to split
           -> [S.ByteString]  -- ^ List of fragments
splitDropS = strictSplitDrop

{-# INLINE splitKeepEndL #-}
splitKeepEndL :: S.ByteString    -- ^ Pattern to split on
              -> L.ByteString    -- ^ String to split
              -> [L.ByteString]  -- ^ List of fragments
splitKeepEndL pat
    | S.null pat    = const (repeat L.empty)
    | otherwise     =
      let splitter = lazySplitKeepEnd pat
      in  map L.fromChunks . splitter . L.toChunks

{-# INLINE splitKeepFrontL #-}
splitKeepFrontL :: S.ByteString    -- ^ Pattern to split on
                -> L.ByteString    -- ^ String to split
                -> [L.ByteString]  -- ^ List of fragments
splitKeepFrontL pat
    | S.null pat    = const (repeat L.empty)
    | otherwise     =
      let splitter = lazySplitKeepFront pat
      in  map L.fromChunks . splitter . L.toChunks


{-# INLINE splitDropL #-}
splitDropL :: S.ByteString    -- ^ Pattern to split on
           -> L.ByteString    -- ^ String to split
           -> [L.ByteString]  -- ^ List of fragments
splitDropL pat
    | S.null pat    = const (repeat L.empty)
    | otherwise     =
      let splitter = lazySplitDrop pat
      in map L.fromChunks . splitter . L.toChunks

------------------------------------------------------------------------------
--                             Search Functions                             --
------------------------------------------------------------------------------

strictSearcher :: Bool -> S.ByteString -> S.ByteString -> [Int]
strictSearcher _ !pat
    | S.null pat = enumFromTo 0 . S.length
    | S.length pat == 1 = let !w = S.head pat in S.elemIndices w
strictSearcher !overlap pat = searcher
  where
    {-# INLINE patAt #-}
    patAt :: Int -> Word8
    patAt !i = unsafeIndex pat i

    !patLen = S.length pat
    !patEnd = patLen - 1
    !maxLen = maxBound - patLen
    !occT   = occurs pat        -- for bad-character-shift
    !suffT  = suffShifts pat    -- for good-suffix-shift
    !skip   = if overlap then unsafeAt suffT 0 else patLen
    -- shift after a complete match
    !kept   = patLen - skip     -- length of known prefix after full match
    !pe     = patAt patEnd      -- last pattern byte for fast comparison

    {-# INLINE occ #-}
    occ !w = unsafeAt occT (fromIntegral w)

    {-# INLINE suff #-}
    suff !i = unsafeAt suffT i

    searcher str
        | maxLen < strLen
            = error "Overflow in BoyerMoore.strictSearcher"
        | maxDiff < 0   = []
        | otherwise     = checkEnd patEnd
          where
            !strLen  = S.length str
            !strEnd  = strLen - 1
            !maxDiff = strLen - patLen

            {-# INLINE strAt #-}
            strAt !i = unsafeIndex str i

            -- After a full match, we know how long a prefix of the pattern
            -- still matches. Do not re-compare the prefix to prevent O(m*n)
            -- behaviour for periodic patterns.
            afterMatch !diff !patI =
              case strAt (diff + patI) of
                !c  | c == patAt patI ->
                      if patI == kept
                        then diff : let !diff' = diff + skip
                                    in if maxDiff < diff'
                                        then []
                                        else afterMatch diff' patEnd
                        else afterMatch diff (patI - 1)
                    | patI == patEnd  ->
                            checkEnd (diff + 2*patEnd + occ c)
                    | otherwise       ->
                            let {-# INLINE badShift #-}
                                badShift = patI + occ c
                                {-# INLINE goodShift #-}
                                goodShift = suff patI
                                !diff' = diff + max badShift goodShift
                            in if maxDiff < diff'
                                then []
                                else checkEnd (diff + patEnd)

            -- While comparing the last byte of the pattern, the bad-
            -- character-shift is always at least as large as the good-
            -- suffix-shift. Eliminating the unnecessary memory reads and
            -- comparison speeds things up noticeably.
            checkEnd !sI  -- index in string to compare to last of pattern
                | strEnd < sI   = []
                | otherwise     =
                  case strAt sI of
                    !c  | c == pe   -> findMatch (sI - patEnd) (patEnd - 1)
                        | otherwise -> checkEnd (sI + patEnd + occ c)

            -- Once the last byte has matched, we enter the full matcher
            -- diff is the offset of the window, patI the index of the
            -- pattern byte to compare next.
            findMatch !diff !patI =
                case strAt (diff + patI) of
                    !c  | c == patAt patI ->
                            if patI == 0    -- full match, report
                                then diff : let !diff' = diff + skip
                                            in if maxDiff < diff'
                                                then []
                                                else
                                                  if skip == patLen
                                                    then
                                                      checkEnd (diff' + patEnd)
                                                    else
                                                      afterMatch diff' patEnd
                                else findMatch diff (patI - 1)
                        | otherwise       ->
                            let !diff' = diff + max (patI + occ c) (suff patI)
                            in if maxDiff < diff'
                                then []
                                else checkEnd (diff' + patEnd)


lazySearcher :: Bool -> S.ByteString -> [S.ByteString] -> [Int64]
lazySearcher _ !pat
    | S.null pat        =
      let zgo !prior [] = [prior]
          zgo prior (!str : rest) =
              let !l = S.length str
                  !prior' = prior + fromIntegral l
              in [prior + fromIntegral i | i <- [0 .. l-1]] ++ zgo prior' rest
      in zgo 0
    | S.length pat == 1 =
      let !w = S.head pat
          ixes = S.elemIndices w
          go _ [] = []
          go !prior (!str : rest)
            = let !prior' = prior + fromIntegral (S.length str)
              in map ((+ prior) . fromIntegral) (ixes str) ++ go prior' rest
      in go 0
lazySearcher !overlap pat = searcher
  where
    {-# INLINE patAt #-}
    patAt :: Int -> Word8
    patAt !i = unsafeIndex pat i

    !patLen = S.length pat
    !patEnd = patLen - 1
    {-# INLINE preEnd #-}
    preEnd  = patEnd - 1
    !maxLen = maxBound - patLen
    !occT   = occurs pat        -- for bad-character-shift
    !suffT  = suffShifts pat    -- for good-suffix-shift
    !skip   = if overlap then unsafeAt suffT 0 else patLen
    -- shift after a complete match
    !kept   = patLen - skip     -- length of known prefix after full match
    !pe     = patAt patEnd      -- last pattern byte for fast comparison

    {-# INLINE occ #-}
    occ !w = unsafeAt occT (fromIntegral w)

    {-# INLINE suff #-}
    suff !i = unsafeAt suffT i

    searcher lst = case lst of
                    []      -> []
                    (h : t) ->
                      if maxLen < S.length h
                        then error "Overflow in BoyerMoore.lazySearcher"
                        else seek 0 [] h t 0 patEnd

    -- seek is used to position the "zipper" of (past, str, future) to the
    -- correct S.ByteString to search. This is done by ensuring that
    -- 0 <= strPos < strLen, where strPos = diffPos + patPos.
    -- Note that future is not a strict parameter. The bytes being compared
    -- will then be (strAt strPos) and (patAt patPos).
    -- Splitting this into specialised versions is possible, but it would
    -- only be useful if the pattern length is close to (or larger than)
    -- the chunk size. For ordinary patterns of at most a few hundred bytes,
    -- the overhead of yet more code-paths and larger code size will probably
    -- outweigh the small gains in the relatively rare calls to seek.
    seek :: Int64 -> [S.ByteString] -> S.ByteString
            -> [S.ByteString] -> Int -> Int -> [Int64]
    seek !prior !past !str future !diffPos !patPos
        | strPos < 0 =  -- need to look at previous chunk
            case past of
                (h : t) ->
                    let !hLen = S.length h
                    in seek (prior - fromIntegral hLen) t h (str : future)
                                (diffPos + hLen) patPos
                []      -> error "seek back too far!"
        | strEnd < strPos =  -- need to look at next chunk if there is
            case future of
                (h : t) ->
                    let {-# INLINE prior' #-}
                        prior' = prior + fromIntegral strLen
                        !diffPos' = diffPos - strLen
                        {-# INLINE past' #-}
                        past' = release (-diffPos') (str : past)
                    in if maxLen < S.length h
                        then error "Overflow in BoyerMoore.lazySearcher"
                        else seek prior' past' h t diffPos' patPos
                []      -> []
        | patPos == patEnd  = checkEnd strPos
        | diffPos < 0       = matcherN diffPos patPos
        | otherwise         = matcherP diffPos patPos
          where
            !strPos  = diffPos + patPos
            !strLen  = S.length str
            !strEnd  = strLen - 1
            !maxDiff = strLen - patLen

            {-# INLINE strAt #-}
            strAt !i = unsafeIndex str i

            -- While comparing the last byte of the pattern, the bad-
            -- character-shift is always at least as large as the good-
            -- suffix-shift. Eliminating the unnecessary memory reads and
            -- comparison speeds things up noticeably.
            checkEnd !sI  -- index in string to compare to last of pattern
              | strEnd < sI = seek prior past str future (sI - patEnd) patEnd
              | otherwise   =
                case strAt sI of
                  !c | c == pe   ->
                       if sI < patEnd
                        then case sI of
                              0 -> seek prior past str future (-patEnd) preEnd
                              _ -> matcherN (sI - patEnd) preEnd
                        else matcherP (sI - patEnd) preEnd
                     | otherwise -> checkEnd (sI + patEnd + occ c)

            -- Once the last byte has matched, we enter the full matcher
            -- diff is the offset of the window, patI the index of the
            -- pattern byte to compare next.

            -- matcherN is the tight loop that walks backwards from the end
            -- of the pattern checking for matching bytes. The offset is
            -- always negative, so no complete match can occur here.
            -- When a byte matches, we need to check whether we've reached
            -- the front of this chunk, otherwise whether we need the next.
            matcherN !diff !patI =
              case strAt (diff + patI) of
                !c  | c == patAt patI   ->
                        if diff + patI == 0
                            then seek prior past str future diff (patI - 1)
                            else matcherN diff (patI - 1)
                    | otherwise         ->
                        let {-# INLINE badShift #-}
                            badShift = patI + occ c
                            {-# INLINE goodShift #-}
                            goodShift = suff patI
                            !diff' = diff + max badShift goodShift
                        in if maxDiff < diff'
                            then seek prior past str future diff' patEnd
                            else checkEnd (diff' + patEnd)

            -- matcherP is the tight loop for non-negative offsets.
            -- When the pattern is shifted, we must check whether we leave
            -- the current chunk, otherwise we only need to check for a
            -- complete match.
            matcherP !diff !patI =
              case strAt (diff + patI) of
                !c  | c == patAt patI   ->
                      if patI == 0
                        then prior + fromIntegral diff :
                              let !diff' = diff + skip
                              in if maxDiff < diff'
                                then seek prior past str future diff' patEnd
                                else
                                  if skip == patLen
                                    then
                                      checkEnd (diff' + patEnd)
                                    else
                                      afterMatch diff' patEnd
                        else matcherP diff (patI - 1)
                    | otherwise         ->
                        let {-# INLINE badShift #-}
                            badShift = patI + occ c
                            {-# INLINE goodShift #-}
                            goodShift = suff patI
                            !diff' = diff + max badShift goodShift
                        in if maxDiff < diff'
                            then seek prior past str future diff' patEnd
                            else checkEnd (diff' + patEnd)

            -- After a full match, we know how long a prefix of the pattern
            -- still matches. Do not re-compare the prefix to prevent O(m*n)
            -- behaviour for periodic patterns.
            -- This breaks down at chunk boundaries, but except for long
            -- patterns with a short period, that shouldn't matter much.
            afterMatch !diff !patI =
              case strAt (diff + patI) of
                !c  | c == patAt patI ->
                      if patI == kept
                        then prior + fromIntegral diff :
                            let !diff' = diff + skip
                            in if maxDiff < diff'
                                then seek prior past str future diff' patEnd
                                else afterMatch diff' patEnd
                        else afterMatch diff (patI - 1)
                    | patI == patEnd  ->
                        checkEnd (diff + (2*patEnd) + occ c)
                    | otherwise       ->
                        let {-# INLINE badShift #-}
                            badShift = patI + occ c
                            {-# INLINE goodShift #-}
                            goodShift = suff patI
                            !diff' = diff + max badShift goodShift
                        in if maxDiff < diff'
                            then seek prior past str future diff' patEnd
                            else checkEnd (diff' + patEnd)

------------------------------------------------------------------------------
--                            Breaking Functions                            --
------------------------------------------------------------------------------

strictBreak :: S.ByteString -> S.ByteString -> (S.ByteString, S.ByteString)
strictBreak pat
    | S.null pat    = \str -> (S.empty, str)
    | otherwise     = breaker
      where
        searcher = strictSearcher False pat
        breaker str = case searcher str of
                        []      -> (str, S.empty)
                        (i:_)   -> S.splitAt i str


-- Ugh! Code duplication ahead!
-- But we want to get the first component lazily, so it's no good to find
-- the first index (if any) and then split.
-- Therefore bite the bullet and copy most of the code of lazySearcher.
-- No need for afterMatch here, fortunately.
lazyBreak ::S.ByteString -> [S.ByteString] -> ([S.ByteString], [S.ByteString])
lazyBreak !pat
  | S.null pat  = \lst -> ([],lst)
  | S.length pat == 1 =
    let !w = S.head pat
        go [] = ([], [])
        go (!str : rest) =
            case S.elemIndices w str of
                []    -> let (pre, post) = go rest in (str : pre, post)
                (i:_) -> if i == 0
                            then ([], str : rest)
                            else ([S.take i str], S.drop i str : rest)
    in go
lazyBreak pat = breaker
  where
    !patLen = S.length pat
    !patEnd = patLen - 1
    !occT   = occurs pat
    !suffT  = suffShifts pat
    !maxLen = maxBound - patLen
    !pe     = patAt patEnd

    {-# INLINE patAt #-}
    patAt !i = unsafeIndex pat i

    {-# INLINE occ #-}
    occ !w = unsafeAt occT (fromIntegral w)

    {-# INLINE suff #-}
    suff !i = unsafeAt suffT i

    breaker lst =
      case lst of
        []    -> ([],[])
        (h:t) ->
          if maxLen < S.length h
            then error "Overflow in BoyerMoore.lazyBreak"
            else seek [] h t 0 patEnd

    seek :: [S.ByteString] -> S.ByteString -> [S.ByteString]
                -> Int -> Int -> ([S.ByteString], [S.ByteString])
    seek !past !str future !offset !patPos
      | strPos < 0 =
        case past of
          [] -> error "not enough past!"
          (h : t) -> seek t h (str : future) (offset + S.length h) patPos
      | strEnd < strPos =
        case future of
          []      -> (foldr (flip (.) . (:)) id past [str], [])
          (h : t) ->
            let !off' = offset - strLen
                (past', !discharge) = keep (-off') (str : past)
            in if maxLen < S.length h
                then error "Overflow in BoyerMoore.lazyBreak (future)"
                else let (pre,post) = seek past' h t off' patPos
                     in (foldr (flip (.) . (:)) id discharge pre, post)
      | patPos == patEnd = checkEnd strPos
      | offset < 0 = matcherN offset patPos
      | otherwise  = matcherP offset patPos
      where
        {-# INLINE strAt #-}
        strAt !i = unsafeIndex str i

        !strLen = S.length str
        !strEnd = strLen - 1
        !maxOff = strLen - patLen
        !strPos = offset + patPos

        checkEnd !sI
          | strEnd < sI = seek past str future (sI - patEnd) patEnd
          | otherwise   =
            case strAt sI of
              !c  | c == pe   ->
                    if sI < patEnd
                      then (if sI == 0
                              then seek past str future (-patEnd) (patEnd - 1)
                              else matcherN (sI - patEnd) (patEnd - 1))
                      else matcherP (sI - patEnd) (patEnd - 1)
                  | otherwise -> checkEnd (sI + patEnd + occ c)

        matcherN !off !patI =
          case strAt (off + patI) of
            !c  | c == patAt patI ->
                  if off + patI == 0
                    then seek past str future off (patI - 1)
                    else matcherN off (patI - 1)
                | otherwise ->
                    let !off' = off + max (suff patI) (patI + occ c)
                    in if maxOff < off'
                        then seek past str future off' patEnd
                        else checkEnd (off' + patEnd)

        matcherP !off !patI =
          case strAt (off + patI) of
            !c  | c == patAt patI ->
                  if patI == 0
                    then let !pre = if off == 0 then [] else [S.take off str]
                             !post = S.drop off str
                         in (foldr (flip (.) . (:)) id past pre, post:future)
                    else matcherP off (patI - 1)
                | otherwise ->
                    let !off' = off + max (suff patI) (patI + occ c)
                    in if maxOff < off'
                        then seek past str future off' patEnd
                        else checkEnd (off' + patEnd)


------------------------------------------------------------------------------
--                            Splitting Functions                           --
------------------------------------------------------------------------------

strictSplitKeepFront :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitKeepFront pat
  | S.null pat  = const (repeat S.empty)
strictSplitKeepFront pat = splitter
  where
    !patLen = S.length pat
    searcher = strictSearcher False pat
    splitter str
        | S.null str    = []
        | otherwise     =
          case searcher str of
            []            -> [str]
            (i:_)
              | i == 0    -> psplitter str
              | otherwise -> S.take i str : psplitter (S.drop i str)
    psplitter !str
        | S.null str    = []
        | otherwise     =
          case searcher (S.drop patLen str) of
            []      -> [str]
            (i:_)   -> S.take (i + patLen) str :
                        psplitter (S.drop (i + patLen) str)

strictSplitKeepEnd :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitKeepEnd pat
  | S.null pat  = const (repeat S.empty)
strictSplitKeepEnd pat = splitter
  where
    !patLen = S.length pat
    searcher = strictSearcher False pat
    splitter str
        | S.null str    = []
        | otherwise     =
          case searcher str of
            [] -> [str]
            (i:_) -> S.take (i + patLen) str :
                        splitter (S.drop (i + patLen) str)

strictSplitDrop :: S.ByteString -> S.ByteString -> [S.ByteString]
strictSplitDrop pat
    | S.null pat    = const (repeat S.empty)
strictSplitDrop pat = splitter'
  where
    !patLen = S.length pat
    searcher = strictSearcher False pat
    splitter' str
        | S.null str    = []
        | otherwise     = splitter str
    splitter str
        | S.null str    = [S.empty]
        | otherwise     =
          case searcher str of
            []            -> [str]
            (i:_) -> S.take i str : splitter (S.drop (i + patLen) str)

-- non-empty pattern
lazySplitKeepFront :: S.ByteString -> [S.ByteString] -> [[S.ByteString]]
lazySplitKeepFront pat = splitter'
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    splitter' strs = case splitter strs of
                        ([]:rest) -> rest
                        other -> other
    splitter [] = []
    splitter strs =
      case breaker strs of
        (pre, mtch) ->
           pre : case mtch of
                    [] -> []
                    _  -> case lsplit patLen mtch of
                            (pt, rst) ->
                              if null rst
                                then [pt]
                                else let (h : t) = splitter rst
                                     in (pt ++ h) : t

-- non-empty pattern
lazySplitKeepEnd :: S.ByteString -> [S.ByteString] -> [[S.ByteString]]
lazySplitKeepEnd pat = splitter
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    splitter [] = []
    splitter strs =
      case breaker strs of
        (pre, mtch) ->
            let (h : t) = if null mtch
                            then [[]]
                            else case lsplit patLen mtch of
                                    (pt, rst) -> pt : splitter rst
            in (pre ++ h) : t

lazySplitDrop :: S.ByteString -> [S.ByteString] -> [[S.ByteString]]
lazySplitDrop pat = splitter
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    splitter [] = []
    splitter strs = splitter' strs
    splitter' [] = [[]]
    splitter' strs = case breaker strs of
                        (pre,mtch) ->
                            pre : case mtch of
                                    [] -> []
                                    _  -> splitter' (ldrop patLen mtch)

------------------------------------------------------------------------------
--                            Replacing Functions                           --
------------------------------------------------------------------------------

-- replacing loop for strict ByteStrings, called only for
-- non-empty patterns and substitutions
strictRepl :: S.ByteString -> ([S.ByteString] -> [S.ByteString])
            -> S.ByteString -> [S.ByteString]
strictRepl pat  = repl
  where
    !patLen = S.length pat
    searcher = strictSearcher False pat
    repl sub = replacer
      where
        replacer str
          | S.null str    = []
          | otherwise     =
            case searcher str of
              []            -> [str]
              (i:_)
                | i == 0    -> sub $ replacer (S.drop patLen str)
                | otherwise ->
                  S.take i str : sub (replacer (S.drop (i + patLen) str))

{-

These would be really nice.
Unfortunately they're too slow, so instead, there's another instance of
almost the same code as in lazySearcher below.

-- variant of below
lazyFRepl :: S.ByteString -> ([S.ByteString] -> [S.ByteString])
                -> [S.ByteString] -> [S.ByteString]
lazyFRepl pat = repl
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    repl sub = replacer
      where
        replacer [] = []
        replacer strs =
          let (pre, mtch) = breaker strs
          in pre ++ case mtch of
                      [] -> []
                      _  -> sub (replacer (ldrop patLen mtch))

-- This is nice and short. I really hope it's performing well!
lazyBRepl :: S.ByteString -> S.ByteString -> [S.ByteString] -> [S.ByteString]
lazyBRepl pat !sub = replacer
  where
    !patLen = S.length pat
    breaker = lazyBreak pat
    replacer [] = []
    replacer strs = let (pre, mtch) = breaker strs
                    in pre ++ case mtch of
                                [] -> []
                                _  -> sub : replacer (ldrop patLen mtch)
-}

-- Yet more code duplication.
--
-- Benchmark it against an implementation using lazyBreak and,
-- unless it's significantly faster, NUKE IT!!
--
-- Sigh, it is significantly faster. 10 - 25 %.
-- I could live with the 10, but 25 is too much.
--
-- Hmm, maybe an implementation via
-- replace pat sub = L.intercalate sub . split pat
-- would be competitive now.
-- TODO: test speed and space usage.
--
-- replacing loop for lazy ByteStrings as list of chunks,
-- called only for non-empty patterns
lazyRepl :: S.ByteString -> ([S.ByteString] -> [S.ByteString])
            -> [S.ByteString] -> [S.ByteString]
lazyRepl pat = replacer
 where
  !patLen = S.length pat
  !patEnd = patLen - 1
  !occT   = occurs pat
  !suffT  = suffShifts pat
  !maxLen = maxBound - patLen
  !pe     = patAt patEnd

  {-# INLINE patAt #-}
  patAt !i = unsafeIndex pat i

  {-# INLINE occ #-}
  occ !w = unsafeAt occT (fromIntegral w)

  {-# INLINE suff #-}
  suff !i = unsafeAt suffT i

  replacer sub lst =
      case lst of
        []    -> []
        (h:t) ->
          if maxLen < S.length h
            then error "Overflow in BoyerMoore.lazyRepl"
            else seek [] h t 0 patEnd
   where
        chop _ [] = []
        chop !k (!str : rest)
          | k < s     =
            if maxLen < (s - k)
                then error "Overflow in BoyerMoore.lazyRepl (chop)"
                else seek [] (S.drop k str) rest 0 patEnd
          | otherwise = chop (k-s) rest
            where
              !s = S.length str

        seek :: [S.ByteString] -> S.ByteString -> [S.ByteString]
                                    -> Int -> Int -> [S.ByteString]
        seek !past !str fut !offset !patPos
          | strPos < 0 =
            case past of
              [] -> error "not enough past!"
              (h : t) -> seek t h (str : fut) (offset + S.length h) patPos
          | strEnd < strPos =
            case fut of
              []      -> foldr (flip (.) . (:)) id past [str]
              (h : t) ->
                let !off' = offset - strLen
                    (past', !discharge) = keep (-off') (str : past)
                in if maxLen < S.length h
                    then error "Overflow in BoyerMoore.lazyRepl (future)"
                    else foldr (flip (.) . (:)) id discharge $
                                            seek past' h t off' patPos
          | patPos == patEnd = checkEnd strPos
          | offset < 0 = matcherN offset patPos
          | otherwise  = matcherP offset patPos
            where
              {-# INLINE strAt #-}
              strAt !i = unsafeIndex str i

              !strLen = S.length str
              !strEnd = strLen - 1
              !maxOff = strLen - patLen
              !strPos = offset + patPos

              checkEnd !sI
                | strEnd < sI = seek past str fut (sI - patEnd) patEnd
                | otherwise   =
                  case strAt sI of
                    !c  | c == pe   ->
                          if sI < patEnd
                            then (if sI == 0
                              then seek past str fut (-patEnd) (patEnd - 1)
                              else matcherN (sI - patEnd) (patEnd - 1))
                          else matcherP (sI - patEnd) (patEnd - 1)
                        | otherwise -> checkEnd (sI + patEnd + occ c)

              matcherN !off !patI =
                case strAt (off + patI) of
                  !c  | c == patAt patI ->
                        if off + patI == 0
                          then seek past str fut off (patI - 1)
                          else matcherN off (patI - 1)
                      | otherwise ->
                        let !off' = off + max (suff patI) (patI + occ c)
                        in if maxOff < off'
                            then seek past str fut off' patEnd
                            else checkEnd (off' + patEnd)

              matcherP !off !patI =
                case strAt (off + patI) of
                  !c  | c == patAt patI ->
                        if patI == 0
                          then foldr (flip (.) . (:)) id past $
                            let pre = if off == 0
                                        then id
                                        else (S.take off str :)
                            in pre . sub $
                                let !p = off + patLen
                                in if p < strLen
                                    then seek [] (S.drop p str) fut 0 patEnd
                                    else chop (p - strLen) fut
                        else matcherP off (patI - 1)
                      | otherwise ->
                        let !off' = off + max (suff patI) (patI + occ c)
                        in if maxOff < off'
                            then seek past str fut off' patEnd
                            else checkEnd (off' + patEnd)

------------------------------------------------------------------------------
--                              Preprocessing                               --
------------------------------------------------------------------------------

{- Table of last occurrences of bytes in the pattern.

For each byte we record the (negated) position of its last
occurrence in the pattern except at the last position.

Thus, if byte b gives a mismatch at pattern position patPos,
we know that we can shift the window right by at least

patPos - (last occurrence of b in init pat)

or, since we negated the positions,

patPos + (occurs pat)

If the byte doesn't occur in the pattern, we can shift the window
so that the start of the pattern is aligned with the byte after this,
hence the default value of 1.

Complexity: O(patLen + size of alphabet)

-}
{- Precondition: non-empty pattern

This invariant is guaranteed by not exporting occurs,
inside this module, we don't call it for empty patterns.

-}
{-# INLINE occurs #-}
occurs :: S.ByteString -> UArray Int Int
occurs pat = runSTUArray (do
    let !patEnd = S.length pat - 1
        {-# INLINE patAt #-}
        patAt :: Int -> Int
        patAt i = fromIntegral (unsafeIndex pat i)
    ar <- newArray (0, 255) 1
    let loop !i
            | i == patEnd   = return ar
            | otherwise     = do
                unsafeWrite ar (patAt i) (-i)
                loop (i + 1)
    loop 0)

{- Table of suffix-shifts.

When a mismatch occurs at pattern position patPos, assumed to be not the
last position in the pattern, the suffix u of length (patEnd - patPos)
has been successfully matched.
Let c be the byte in the pattern at position patPos.

If the sub-pattern u also occurs in the pattern somewhere *not* preceded
by c, let uPos be the position of the last byte in u for the last of
all such occurrences. Then there can be no match if the window is shifted
less than (patEnd - uPos) places, because either the part of the string
which matched the suffix u is not aligned with an occurrence of u in the
pattern, or it is aligned with an occurrence of u which is preceded by
the same byte c as the originally matched suffix.

If the complete sub-pattern u does not occur again in the pattern, or all
of its occurrences are preceded by the byte c, then we can align the
pattern with the string so that a suffix v of u matches a prefix of the
pattern. If v is chosen maximal, no smaller shift can give a match, so
we can shift by at least (patLen - length v).

If a complete match is encountered, we can shift by at least the same
amount as if the first byte of the pattern was a mismatch, no complete
match is possible between these positions.

For non-periodic patterns, only very short suffixes will usually occur
again in the pattern, so if a longer suffix has been matched before a
mismatch, the window can then be shifted entirely past the partial
match, so that part of the string will not be re-compared.
For periodic patterns, the suffix shifts will be shorter in general,
leading to an O(strLen * patLen) worst-case performance.

To compute the suffix-shifts, we use an array containing the lengths of
the longest common suffixes of the entire pattern and its prefix ending
with position pos.

-}
{- Precondition: non-empty pattern -}
{-# INLINE suffShifts #-}
suffShifts :: S.ByteString -> UArray Int Int
suffShifts pat = runSTUArray (do
    let !patLen = S.length pat
        !patEnd = patLen - 1
        !suff   = suffLengths pat
    ar <- newArray (0,patEnd) patLen
    let preShift !idx !j
            | idx < 0   = return ()
            | suff `unsafeAt` idx == idx + 1 = do
                let !shf = patEnd - idx
                    fillToShf !i
                        | i == shf  = return ()
                        | otherwise = do
                            unsafeWrite ar i shf
                            fillToShf (i + 1)
                fillToShf j
                preShift (idx - 1) shf
            | otherwise = preShift (idx - 1) j
        sufShift !idx
            | idx == patEnd = return ar
            | otherwise     = do
                unsafeWrite ar (patEnd - unsafeAt suff idx) (patEnd - idx)
                sufShift (idx + 1)
    preShift (patEnd - 1) 0
    sufShift 0)

{- Table of suffix-lengths.

The value of this array at place i is the length of the longest common
suffix of the entire pattern and the prefix of the pattern ending at
position i.

Usually, most of the entries will be 0. Only if the byte at position i
is the same as the last byte of the pattern can the value be positive.
In any case the value at index patEnd is patLen (since the pattern is
identical to itself) and 0 <= value at i <= (i + 1).

To keep this part of preprocessing linear in the length of the pattern,
the implementation must be non-obvious (the obvious algorithm for this
is quadratic).

When the index under consideration is inside a previously identified
common suffix, we align that suffix with the end of the pattern and
check whether the suffix ending at the position corresponding to idx
is shorter than the part of the suffix up to idx. If that is the case,
the length of the suffix ending at idx is that of the suffix at the
corresponding position. Otherwise extend the suffix as far as possible.
If the index under consideration is not inside a previously identified
common suffix, compare with the last byte of the pattern. If that gives
a suffix of length > 1, for the next index we're in the previous
situation, otherwise we're back in the same situation for the next
index.

-}
{- Precondition: non-empty pattern -}
{-# INLINE suffLengths #-}
suffLengths :: S.ByteString -> UArray Int Int
suffLengths pat = runSTUArray (do
    let !patLen = S.length pat
        !patEnd = patLen - 1
        !preEnd = patEnd - 1
        {-# INLINE patAt #-}
        patAt i = unsafeIndex pat i
        -- last byte for comparisons
        !pe     = patAt patEnd
        -- find index preceding the longest suffix
        dec !diff !j
            | j < 0 || patAt j /= patAt (j + diff) = j
            | otherwise = dec diff (j - 1)
    ar <- newArray_ (0, patEnd)
    unsafeWrite ar patEnd patLen
    let noSuff !i
            | i < 0     = return ar
            | patAt i == pe = do
                let !diff  = patEnd - i
                    !nextI = i - 1
                    !prevI = dec diff nextI
                if prevI == nextI
                    then unsafeWrite ar i 1 >> noSuff nextI
                    else do unsafeWrite ar i (i - prevI)
                            suffLoop prevI preEnd nextI
            | otherwise = do
                unsafeWrite ar i 0
                noSuff (i - 1)
        suffLoop !pre !end !idx
            | idx < 0   = return ar
            | pre < idx =
              if patAt idx /= pe
                then unsafeWrite ar idx 0 >> suffLoop pre (end - 1) (idx - 1)
                else do
                    prevS <- unsafeRead ar end
                    if pre + prevS < idx
                        then do unsafeWrite ar idx prevS
                                suffLoop pre (end - 1) (idx - 1)
                        else do let !prI = dec (patEnd - idx) pre
                                unsafeWrite ar idx (idx - prI)
                                suffLoop prI preEnd (idx - 1)
            | otherwise = noSuff idx
    noSuff preEnd)