{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Pipes.ByteString.Substring
( consumeBreakSubstring
, consumeBreakSubstringLeftovers
, consumeDropExactLeftovers
, consumeDropWhileLeftovers
) where
import Data.ByteString (ByteString)
import Data.ByteString.Builder (Builder)
import Data.ByteString.Substring
import Data.Int
import Data.Maybe
import Pipes
import qualified Data.ByteString as B
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Char8 as BC
import qualified Data.ByteString.Lazy as LB
import qualified Data.List as L
consumeBreakSubstring :: Monad m => ByteString -> Consumer' ByteString m (Builder,ByteString)
consumeBreakSubstring = consumeBreakSubstringLeftovers B.empty
consumeBreakSubstringLeftovers :: Monad m => ByteString -> ByteString -> Consumer' ByteString m (Builder,ByteString)
consumeBreakSubstringLeftovers leftovers0 pat = do
(b0, leftovers) <- takeStrictLeftovers leftovers0 (B.length pat)
let !s = prepareBreakSubstring pat b0
go s mempty (LB.fromStrict b0) leftovers
where
go :: Monad m
=> KarpRabinState
-> Builder
-> LB.ByteString
-> ByteString
-> Consumer' ByteString m (Builder,ByteString)
go s1 bb heldChunk chunk = case breakSubstringResume s1 chunk of
KarpRabinResultDone ix _ -> return $ if ix < 0
then let (a,b) = LB.splitAt (LB.length heldChunk + intToInt64 ix) heldChunk in
(bb <> BB.lazyByteString a, LB.toStrict b <> chunk)
else let (a,b) = B.splitAt ix chunk in
(bb <> BB.lazyByteString heldChunk <> BB.byteString a, b)
KarpRabinResultMore s2 -> do
let appendedHeldChunk = heldChunk <> LB.fromStrict chunk
(confirmedChunk,nextHeldChunk) = LB.splitAt (LB.length appendedHeldChunk - intToInt64 (B.length pat)) appendedHeldChunk
nextChunk <- await
go s2 (bb <> BB.lazyByteString confirmedChunk) nextHeldChunk nextChunk
consumeDropExactLeftovers :: Monad m => ByteString -> ByteString -> Consumer' ByteString m (Either (Int,ByteString) ByteString)
consumeDropExactLeftovers leftovers0 preface = go 0 leftovers0
where
go :: Monad m => Int -> ByteString -> Consumer' ByteString m (Either (Int,ByteString) ByteString)
go ix chunk = if lenRemainingPreface > lenChunk
then do
let (p1,_) = B.splitAt lenChunk remainingPreface
if p1 == chunk
then await >>= go (ix + lenChunk)
else do
let ixDifferentByte = findDifferentByte p1 chunk
return (Left (ixDifferentByte + ix,B.drop ixDifferentByte chunk))
else do
let (c1,c2) = B.splitAt lenRemainingPreface chunk
if c1 == remainingPreface
then return (Right c2)
else do
let ixDifferentByte = findDifferentByte c1 remainingPreface
return (Left (ixDifferentByte + ix,B.drop ixDifferentByte chunk))
where
remainingPreface = B.drop ix preface
lenRemainingPreface = B.length remainingPreface
lenChunk = B.length chunk
consumeDropWhileLeftovers :: Monad m => ByteString -> (Char -> Bool) -> Consumer' ByteString m ByteString
consumeDropWhileLeftovers leftovers0 predicate = go leftovers0
where
go :: Monad m => ByteString -> Consumer' ByteString m ByteString
go chunk = do
let remaining = BC.dropWhile predicate chunk
if B.null remaining
then await >>= go
else return remaining
findDifferentByte :: ByteString -> ByteString -> Int
findDifferentByte a b = fromMaybe (-10000) (L.elemIndex False (B.zipWith (==) a b))
takeStrictLeftovers :: Monad m => ByteString -> Int -> Consumer' ByteString m (ByteString,ByteString)
takeStrictLeftovers leftovers0 total = if total < 1
then return (B.empty,leftovers0)
else go 0 mempty leftovers0
where
go :: Monad m => Int -> Builder -> ByteString -> Consumer' ByteString m (ByteString,ByteString)
go i1 bb bs = do
let i2 = i1 + B.length bs
if i2 < total
then await >>= go i2 (bb <> BB.byteString bs)
else do
let (a,b) = B.splitAt (B.length bs - (i2 - total)) bs
return (LB.toStrict $ BB.toLazyByteString $ bb <> BB.byteString a, b)
intToInt64 :: Int -> Int64
intToInt64 = fromIntegral