module Text.HTML.TagStream.Stream where
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as S
import Data.Attoparsec.ByteString (parseOnly)
import Data.Conduit
import Text.HTML.TagStream.Parser
import Text.HTML.TagStream.Types
import Control.Monad (unless)
tokenStream :: Monad m => GInfConduit ByteString m Token
tokenStream =
loop S.empty
where
loop accum = awaitE >>= either (close accum) (push accum)
push accum input =
case parseOnly html (accum `S.append` input) of
Right (splitAccum -> (accum', tokens)) -> mapM_ yield tokens >> loop accum'
Left err -> fail err
close s r = do
unless (S.null s) $ yield $ Text s
return r
splitAccum :: [Token] -> (ByteString, [Token])
splitAccum [] = (S.empty, [])
splitAccum (reverse -> (Incomplete s : xs)) = (s, reverse xs)
splitAccum tokens = (S.empty, tokens)