----------------------------------------------------------------------------- -- | -- Module : Data.Attoparsec.Split -- Copyright : 2010 Suite Solutions Ltd., Israel -- -- Maintainer : Yitz Gale -- Portability : portable -- -- Split a lazy bytestring into a lazy list of lazy bytestrings at -- boundaries defined by an attoparsec parser. The result of -- a matching parse is included at the beginning of the -- lazy bytestring which begins at that point. {- Copyright (c) 2010 Suite Solutions Ltd., Israel. All rights reserved. For licensing information, see the BSD3-style license in the file LICENSE that was originally distributed by the author together with this file. -} module Data.Attoparsec.Split (split) where import Data.Attoparsec (Parser, Result(..), parse) import Data.ByteString.Lazy.Internal (ByteString(..), chunk) import qualified Data.ByteString.Lazy as Lazy import qualified Data.ByteString as B import qualified Data.ByteString as Strict import Data.Maybe (fromMaybe, isJust) -- The result of examining the next group of bytes in the input stream data SplitResult = -- A match of the parser, ending the current lazy bytestring Match !B.ByteString -- The result of the match, to be used as -- the prefix for the next component !ByteString -- The rest of the input after the match !Bool -- Did the match consume any input? | SplitChunk !B.ByteString -- A chunk of the current lazy bytestring deriving Show isChunk :: SplitResult -> Bool isChunk (SplitChunk _) = True isChunk _ = False asChunk :: SplitResult -> B.ByteString asChunk (SplitChunk c) = c asChunk _ = B.empty -- | Split a lazy bytestring at boundaries defined by an attoparsec parser. split :: Parser Strict.ByteString -> Lazy.ByteString -> [Lazy.ByteString] split bdry = firstSplit . splitOne True bdry where firstSplit [] = [] firstSplit result@(Match _ _ _ : _) = continue result firstSplit result = nextSplit B.empty result nextSplit pfx result = chunk pfx (foldr (chunk . asChunk) Empty result) : continue (dropWhile isChunk result) continue (Match pfx xs bump : _) = nextSplit pfx (splitOne bump bdry xs) continue _ = [] -- Split off the chunks of the first lazy bytestring from the input. -- If the previous parser match did not consume any input, pass the -- first byte through and only start looking for further parser matches -- at the second byte. -- A Match element will only occur as the last element of the result list. -- If the last element is not a Match, end of input was reached. splitOne :: Bool -> (Parser B.ByteString) -> ByteString -> [SplitResult] splitOne _ _ Empty = [] splitOne bump bdry (Chunk x xs) | bump = go 0 Nothing . parse bdry $ x | otherwise = go 1 Nothing . parse bdry $ B.tail x where go n _ (Fail _ _ _) | n < B.length x = let n' = n + 1 in go n' Nothing . parse bdry $ B.drop n' x | otherwise = SplitChunk x : splitOne True bdry xs go n ys (Done x' pfx) = [SplitChunk $ B.take n x, Match pfx (chunk x' $ fromMaybe xs ys) $ isJust ys || B.length x' < B.length x - n] go n ys (Partial k) = goPartial n (fromMaybe xs ys) k goPartial n (Chunk y ys) k = go n (Just ys) $ k y goPartial n _ _ = go n Nothing $ Fail B.empty [] ""