{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}

module Pipes.ByteString.Substring
  ( consumeBreakSubstring
  , consumeBreakSubstringLeftovers
  , consumeDropExactLeftovers
  , consumeDropWhileLeftovers
  ) where

import Data.ByteString (ByteString)
import Data.ByteString.Builder (Builder)
import Data.ByteString.Substring
import Data.Int
import Data.Maybe
import Pipes
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy as LB
import qualified Data.List as L

consumeBreakSubstring :: Monad m => ByteString -> Consumer' ByteString m (Builder,ByteString)
consumeBreakSubstring = consumeBreakSubstringLeftovers B.empty

consumeBreakSubstringLeftovers :: Monad m => ByteString -> ByteString -> Consumer' ByteString m (Builder,ByteString)
consumeBreakSubstringLeftovers leftovers0 pat = do
  (b0, leftovers) <- takeStrictLeftovers leftovers0 (B.length pat)
  let !s = prepareBreakSubstring pat b0
  go s mempty (LB.fromStrict b0) leftovers
  where
  go :: Monad m
     => KarpRabinState
     -> Builder
     -> LB.ByteString -- Buffer for chunks we cannot yet append to the builder
     -> ByteString
     -> Consumer' ByteString m (Builder,ByteString)
  go s1 bb heldChunk chunk = case breakSubstringResume s1 chunk of
    KarpRabinResultDone ix _ -> return $ if ix < 0
      then let (a,b) = LB.splitAt (LB.length heldChunk + intToInt64 ix) heldChunk in
        (bb <> BB.lazyByteString a, LB.toStrict b <> chunk)
      else let (a,b) = B.splitAt ix chunk in
        (bb <> BB.lazyByteString heldChunk <> BB.byteString a, b)
    KarpRabinResultMore s2 -> do
      let appendedHeldChunk = heldChunk <> LB.fromStrict chunk
          (confirmedChunk,nextHeldChunk) = LB.splitAt (LB.length appendedHeldChunk - intToInt64 (B.length pat)) appendedHeldChunk
      nextChunk <- await
      go s2 (bb <> BB.lazyByteString confirmedChunk) nextHeldChunk nextChunk

-- | If we get back a Left, then the chunks did not match what we expected.
--   The tuple contains the number of characters that did match and the
--   beginning of the failure to match.
--   If we get back a Right, it has the leftovers from the chunk that
--   completed the match.
consumeDropExactLeftovers :: Monad m => ByteString -> ByteString -> Consumer' ByteString m (Either (Int,ByteString) ByteString)
consumeDropExactLeftovers leftovers0 preface = go 0 leftovers0
  where
  go :: Monad m => Int -> ByteString -> Consumer' ByteString m (Either (Int,ByteString) ByteString)
  go ix chunk = if lenRemainingPreface > lenChunk
    then do
      let (p1,_) = B.splitAt lenChunk remainingPreface
      if p1 == chunk
        then await >>= go (ix + lenChunk)
        else do
          let ixDifferentByte = findDifferentByte p1 chunk
          return (Left (ixDifferentByte + ix,B.drop ixDifferentByte chunk))
    else do
      let (c1,c2) = B.splitAt lenRemainingPreface chunk
      if c1 == remainingPreface
        then return (Right c2)
        else do
          let ixDifferentByte = findDifferentByte c1 remainingPreface
          return (Left (ixDifferentByte + ix,B.drop ixDifferentByte chunk))
    where
    remainingPreface = B.drop ix preface
    lenRemainingPreface = B.length remainingPreface
    lenChunk = B.length chunk

consumeDropWhileLeftovers :: Monad m => ByteString -> (Char -> Bool) -> Consumer' ByteString m ByteString
consumeDropWhileLeftovers leftovers0 predicate = go leftovers0
  where
  go :: Monad m => ByteString -> Consumer' ByteString m ByteString
  go chunk = do
    let remaining = BC.dropWhile predicate chunk
    if B.null remaining
      then await >>= go
      else return remaining

-- | This is extremely inefficient. Returns -10000 if all bytes match.
findDifferentByte :: ByteString -> ByteString -> Int
findDifferentByte a b = fromMaybe (-10000) (L.elemIndex False (B.zipWith (==) a b))

-- | In the returned tuple, the first element is the bytestring prior to
--   the index. The second item is the leftover bytes in the chunk.
takeStrictLeftovers :: Monad m => ByteString -> Int -> Consumer' ByteString m (ByteString,ByteString)
takeStrictLeftovers leftovers0 total = if total < 1
  then return (B.empty,leftovers0)
  else go 0 mempty leftovers0
  where
  go :: Monad m => Int -> Builder -> ByteString -> Consumer' ByteString m (ByteString,ByteString)
  go i1 bb bs = do
    let i2 = i1 + B.length bs
    if i2 < total
      then await >>= go i2 (bb <> BB.byteString bs)
      else do
        let (a,b) = B.splitAt (B.length bs - (i2 - total)) bs
        return (LB.toStrict $ BB.toLazyByteString $ bb <> BB.byteString a, b)

intToInt64 :: Int -> Int64
intToInt64 = fromIntegral