-----------------------------------------------------------------------------
-- |
-- Module      :  Data.Attoparsec.Split
-- Copyright   :  2010 Suite Solutions Ltd., Israel
--
-- Maintainer  :  Yitz Gale <yitzg@suite-sol.com>
-- Portability :  portable
--
-- Split a lazy bytestring into a lazy list of lazy bytestrings at
-- boundaries defined by an attoparsec parser. The result of
-- a matching parse is included at the beginning of the
-- lazy bytestring which begins at that point.

{- Copyright (c) 2010 Suite Solutions Ltd., Israel. All rights reserved.
For licensing information, see the BSD3-style license in the file
LICENSE that was originally distributed by the author together with
this file. -}

module Data.Attoparsec.Split (split) where

import Data.Attoparsec (Parser, Result, IResult(..), parse)
import Data.ByteString.Lazy.Internal (ByteString(..), chunk)
import qualified Data.ByteString.Lazy as Lazy
import qualified Data.ByteString as B
import qualified Data.ByteString as Strict
import Data.Maybe (fromMaybe, isJust)

-- The result of examining the next group of bytes in the input stream
data SplitResult =
    -- A match of the parser, ending the current lazy bytestring
    Match !B.ByteString -- The result of the match, to be used as
                        -- the prefix for the next component
          !ByteString   -- The rest of the input after the match
          !Bool         -- Did the match consume any input?
  | SplitChunk !B.ByteString -- A chunk of the current lazy bytestring
  deriving Show

isChunk :: SplitResult -> Bool
isChunk (SplitChunk _) = True
isChunk _              = False

asChunk :: SplitResult -> B.ByteString
asChunk (SplitChunk c) = c
asChunk _              = B.empty

-- | Split a lazy bytestring at boundaries defined by an attoparsec parser.
split :: Parser Strict.ByteString -> Lazy.ByteString -> [Lazy.ByteString]
split bdry = firstSplit . splitOne True bdry
  where
    firstSplit []                       = []
    firstSplit result@(Match _ _ _ : _) = continue result
    firstSplit result                   = nextSplit B.empty result

    nextSplit pfx result =
      chunk pfx (foldr (chunk . asChunk) Empty result) :
      continue (dropWhile isChunk result)

    continue (Match pfx xs bump : _) = nextSplit pfx (splitOne bump bdry xs)
    continue _                       = []

-- Split off the chunks of the first lazy bytestring from the input.
-- If the previous parser match did not consume any input, pass the
-- first byte through and only start looking for further parser matches
-- at the second byte.
-- A Match element will only occur as the last element of the result list.
-- If the last element is not a Match, end of input was reached.
splitOne :: Bool -> (Parser B.ByteString) -> ByteString -> [SplitResult]
splitOne _    _    Empty        = []
splitOne bump bdry (Chunk x xs)
 | bump                         = go 0 Nothing . parse bdry $ x
 | otherwise                    = go 1 Nothing . parse bdry $ B.tail x
  where
    go n _ (Fail _ _ _)
     | n < B.length x     = let n' = n + 1
                            in go n' Nothing . parse bdry $ B.drop n' x
     | otherwise          = SplitChunk x : splitOne True bdry xs
    go n ys (Done x' pfx) = [SplitChunk $ B.take n x,
                             Match pfx (chunk x' $ fromMaybe xs ys) $
                               isJust ys || B.length x' < B.length x - n]
    go n ys (Partial k)   = goPartial n (fromMaybe xs ys) k

    goPartial n (Chunk y ys) k = go n (Just ys) $ k y
    goPartial n _            _ = go n Nothing $ Fail B.empty [] ""