-- | Split incombing bytestrings based on bytestrings.

module Pipes.Split.ByteString where

import           Control.Monad (join,unless)
import           Control.Monad.Trans.Class (lift)
import           Data.ByteString (ByteString)
import           Data.ByteString.Search (indices)
import           Data.Monoid ((<>))
import           Debug.Trace
import           Pipes (Producer,next,yield)
import qualified Data.ByteString as BS



type Lens' a b = forall f . Functor f => (b -> f b) -> (a -> f a)

-- | Splits bytestrings after each pattern @pat@. Tries to minimize the
-- number of intermediate bytestring constructors.
--
-- The following function @ske@ expects a string @str@ and a pattern @pat@
-- and then returns a tuple with the splitted bytestrings in @fst@ and the
-- return value in @snd@.
--
-- The inner parser @parse@ uses @zoom@ to draw the full inner producer,
-- which should contain just one bytestring, namely one of the split off
-- ones. @parse@ doesn't do anything with the inner producer, except
-- returning the contained bytestring.
--
-- @parse@ returns @Right $ concat xs@ on a correct parse, and @Left []@
-- once the input has been exhausted.
--
-- @
-- ske :: ByteString -> ByteString -> ([ByteString],[ByteString],[ByteString])
-- ske pat str | BS.null pat || BS.null str = ([],[],[])
-- ske pat str =
--   let parse = do
--         xs <- zoom (splitKeepEnd pat) PP.drawAll
--         case xs of
--           [] -> return $ Left []
--           xs -> return $ Right $ BS.concat xs
--       (a,(b,p)) = runIdentity . P.toListM' $ PP.parsed parse $ PP.yield str
--   in (a,b, fst . runIdentity . P.toListM' $ p)
-- @

splitKeepEnd :: Monad m => ByteString -> Lens' (Producer ByteString m x) (Producer ByteString m (Producer ByteString m x))
splitKeepEnd :: ByteString
-> Lens'
     (Producer ByteString m x)
     (Producer ByteString m (Producer ByteString m x))
splitKeepEnd ByteString
pat Producer ByteString m (Producer ByteString m x)
-> f (Producer ByteString m (Producer ByteString m x))
k Producer ByteString m x
p0 = (Producer ByteString m (Producer ByteString m x)
 -> Producer ByteString m x)
-> f (Producer ByteString m (Producer ByteString m x))
-> f (Producer ByteString m x)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Producer ByteString m (Producer ByteString m x)
-> Producer ByteString m x
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Producer ByteString m (Producer ByteString m x)
-> f (Producer ByteString m (Producer ByteString m x))
k (ByteString
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
go ByteString
BS.empty Producer ByteString m x
p0)) where
  go :: ByteString
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
go ByteString
pre Producer ByteString m x
p = do
    Either x (ByteString, Producer ByteString m x)
x <- m (Either x (ByteString, Producer ByteString m x))
-> Proxy
     X
     ()
     ()
     ByteString
     m
     (Either x (ByteString, Producer ByteString m x))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Producer ByteString m x
-> m (Either x (ByteString, Producer ByteString m x))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m x
p)
    case Either x (ByteString, Producer ByteString m x)
x of
      Left x
r -> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer ByteString m x
 -> Producer ByteString m (Producer ByteString m x))
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
forall a b. (a -> b) -> a -> b
$ x -> Producer ByteString m x
forall (m :: * -> *) a. Monad m => a -> m a
return x
r
      Right (ByteString
bs, Producer ByteString m x
p') -> do
        case ByteString -> [Int]
fnd (ByteString
pre ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
bs) of
          -- no hit yet, send the bs down, keep some suffix
          [] -> do
            Bool
-> Proxy X () () ByteString m () -> Proxy X () () ByteString m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
BS.null ByteString
bs) (ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
bs)
            let pfx :: ByteString
pfx = Int -> ByteString -> ByteString
BS.drop (ByteString -> Int
BS.length ByteString
bs Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) ByteString
bs
            ByteString
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
go ByteString
pfx Producer ByteString m x
p'
          -- at least one hit, split off the correct part, remainder goes
          -- back.
          (Int
k:[Int]
_) -> do
            let (ByteString
y,ByteString
suf) = Int -> ByteString -> (ByteString, ByteString)
BS.splitAt (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- ByteString -> Int
BS.length ByteString
pre Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
l) ByteString
bs
            ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
y
            Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
suf Proxy X () () ByteString m ()
-> Producer ByteString m x -> Producer ByteString m x
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Producer ByteString m x
p')
  l :: Int
l = ByteString -> Int
BS.length ByteString
pat
  fnd :: ByteString -> [Int]
fnd = ByteString -> ByteString -> [Int]
indices ByteString
pat
{-# Inlineable splitKeepEnd #-}



-- | Split a string into substrings, where each substring starts with @pat@
-- and continues until just before the next @pat@ (or until there is no
-- more input).
--
-- Any prefix that does not start with the substring is /kept/!
--
-- Since each substring is supposed to start with @pat@, there is a small
-- problem. What about a header that prefixes the string we are interested
-- in?

splitKeepStart :: Monad m => ByteString -> Lens' (Producer ByteString m x) (Producer ByteString m (Producer ByteString m x))
splitKeepStart :: ByteString
-> Lens'
     (Producer ByteString m x)
     (Producer ByteString m (Producer ByteString m x))
splitKeepStart = (ByteString -> Int -> Int -> Int -> (ByteString, ByteString))
-> ByteString
-> Lens'
     (Producer ByteString m x)
     (Producer ByteString m (Producer ByteString m x))
forall (m :: * -> *) x.
Monad m =>
(ByteString -> Int -> Int -> Int -> (ByteString, ByteString))
-> ByteString
-> Lens'
     (Producer ByteString m x)
     (Producer ByteString m (Producer ByteString m x))
splitGeneric (\ByteString
bs Int
k Int
p Int
l -> Int -> ByteString -> (ByteString, ByteString)
BS.splitAt (Int
k Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
p) ByteString
bs)
{-# Inlineable splitKeepStart #-}



-- | Generic splitting function. Takes a bytestring @[a,b,c]@ (where
-- @a,b,c@ are substrings of the bytestring!) and performs the split.
--

splitGeneric
  :: Monad m
  => (ByteString -> Int -> Int -> Int -> (ByteString,ByteString))
  -- ^ splitter function
  -> ByteString
  -- ^ pattern to split on
  -> Lens' (Producer ByteString m x) (Producer ByteString m (Producer ByteString m x))
  -- ^ lens into the individual split off bytestrings
splitGeneric :: (ByteString -> Int -> Int -> Int -> (ByteString, ByteString))
-> ByteString
-> Lens'
     (Producer ByteString m x)
     (Producer ByteString m (Producer ByteString m x))
splitGeneric ByteString -> Int -> Int -> Int -> (ByteString, ByteString)
splt ByteString
pat Producer ByteString m (Producer ByteString m x)
-> f (Producer ByteString m (Producer ByteString m x))
k Producer ByteString m x
p0 = (Producer ByteString m (Producer ByteString m x)
 -> Producer ByteString m x)
-> f (Producer ByteString m (Producer ByteString m x))
-> f (Producer ByteString m x)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Producer ByteString m (Producer ByteString m x)
-> Producer ByteString m x
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Producer ByteString m (Producer ByteString m x)
-> f (Producer ByteString m (Producer ByteString m x))
k (ByteString
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
go ByteString
BS.empty Producer ByteString m x
p0)) where
  go :: ByteString
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
go ByteString
pre Producer ByteString m x
p = do
    Either x (ByteString, Producer ByteString m x)
x <- m (Either x (ByteString, Producer ByteString m x))
-> Proxy
     X
     ()
     ()
     ByteString
     m
     (Either x (ByteString, Producer ByteString m x))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (Producer ByteString m x
-> m (Either x (ByteString, Producer ByteString m x))
forall (m :: * -> *) a r.
Monad m =>
Producer a m r -> m (Either r (a, Producer a m r))
next Producer ByteString m x
p)
    case Either x (ByteString, Producer ByteString m x)
x of
      Left x
r -> do
        -- yield final split off string
        Bool
-> Proxy X () () ByteString m () -> Proxy X () () ByteString m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Bool
BS.null ByteString
pre) (ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
pre)
        Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
forall (m :: * -> *) a. Monad m => a -> m a
return (Producer ByteString m x
 -> Producer ByteString m (Producer ByteString m x))
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
forall a b. (a -> b) -> a -> b
$ x -> Producer ByteString m x
forall (m :: * -> *) a. Monad m => a -> m a
return x
r
      Right (ByteString
bs, Producer ByteString m x
p') -> do
        -- will not search in the part of the prefix that *can not contain*
        -- the @pat@tern.
        case ByteString -> [Int]
fnd ((Int -> ByteString -> ByteString
BS.drop (ByteString -> Int
BS.length ByteString
pre Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
l) ByteString
pre) ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
bs) of
          -- no hit yet, send the prefix down completely, make bs new
          -- prefix if possible. If either @pre@ or @bs@ are too short, we
          -- keep @pre <> bs@ for the next round. This should not happen if
          -- the pattern is reasonably short compared to the size of the
          -- bytestring chunks.
          [] -> do
            if (ByteString -> Int
BS.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l)
            then ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
pre Proxy X () () ByteString m ()
-> Producer ByteString m (Producer ByteString m x)
-> Producer ByteString m (Producer ByteString m x)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteString
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
go ByteString
bs Producer ByteString m x
p'
            else ByteString
-> Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
go (ByteString
pre ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
bs) Producer ByteString m x
p'
          -- at least one hit, split off the correct part, remainder goes
          -- back.
          (Int
k:[Int]
_) -> do
            let (ByteString
y,ByteString
suf) = ByteString -> Int -> Int -> Int -> (ByteString, ByteString)
splt ByteString
bs Int
k (ByteString -> Int
BS.length ByteString
pre) Int
l
            ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
y
            Producer ByteString m x
-> Producer ByteString m (Producer ByteString m x)
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Proxy X () () ByteString m ()
forall (m :: * -> *) a x' x. Functor m => a -> Proxy x' x () a m ()
yield ByteString
suf Proxy X () () ByteString m ()
-> Producer ByteString m x -> Producer ByteString m x
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Producer ByteString m x
p')
  l :: Int
l = ByteString -> Int
BS.length ByteString
pat
  fnd :: ByteString -> [Int]
fnd = ByteString -> ByteString -> [Int]
indices ByteString
pat
{-# Inline splitGeneric #-}



-- manual splitting, for @splitKeepEnd@

referenceByteStringTokenizer :: ByteString -> ByteString -> [ByteString]
referenceByteStringTokenizer ByteString
pat ByteString
str | ByteString -> Bool
BS.null ByteString
pat Bool -> Bool -> Bool
|| ByteString -> Bool
BS.null ByteString
str = []
referenceByteStringTokenizer ByteString
pat ByteString
str
  = (ByteString
h ByteString -> ByteString -> ByteString
`BS.append` Int -> ByteString -> ByteString
BS.take (ByteString -> Int
BS.length ByteString
pat) ByteString
t)
  ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: if ByteString -> Bool
BS.null ByteString
t then [] else ByteString -> ByteString -> [ByteString]
referenceByteStringTokenizer ByteString
pat (Int -> ByteString -> ByteString
BS.drop (ByteString -> Int
BS.length ByteString
pat) ByteString
t)
    where (ByteString
h,ByteString
t) = ByteString -> ByteString -> (ByteString, ByteString)
BS.breakSubstring ByteString
pat ByteString
str