{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} module HaskellWorks.Data.Xml.Conduit.Blank ( blankXml ) where import Control.Monad import Control.Monad.Trans.Resource (MonadThrow) import Data.ByteString as BS import Data.Conduit import Data.Word import Data.Word8 import HaskellWorks.Data.Xml.Conduit.Words import Prelude as P type ExpectedChar = Word8 data BlankState = InXml | InTag | InAttrList | InCloseTag | InClose | InBang Int | InString ExpectedChar | InText | InMeta | InCdataTag | InCdata Int | InRem Int | InIdent data ByteStringP = BSP Word8 ByteString | EmptyBSP blankXml :: MonadThrow m => Conduit BS.ByteString m BS.ByteString blankXml = blankXml' Nothing InXml blankXml' :: MonadThrow m => Maybe Word8 -> BlankState -> Conduit BS.ByteString m BS.ByteString blankXml' lastChar lastState = do mbs <- await case prefix lastChar mbs of Just bsp -> do let (safe, next) = unsnocUndecided bsp let (!cs, Just (!nextState, _)) = unfoldrN (lenBSP safe) blankByteString (lastState, safe) yield cs blankXml' next nextState Nothing -> return () where blankByteString :: (BlankState, ByteStringP) -> Maybe (Word8, (BlankState, ByteStringP)) blankByteString (InXml, bs) = case bs of BSP !c !cs | isMetaStart c cs -> Just (_bracketleft , (InMeta , toBSP cs)) BSP !c !cs | isEndTag c cs -> Just (_space , (InCloseTag, toBSP cs)) BSP !c !cs | isTextStart c -> Just (_t , (InText , toBSP cs)) BSP !c !cs | c == _less -> Just (_less , (InTag , toBSP cs)) BSP _ !cs -> Just (_space , (InXml , toBSP cs)) EmptyBSP -> Nothing blankByteString (InTag, bs) = case bs of BSP !c !cs | isSpace c -> Just (_parenleft , (InAttrList, toBSP cs)) BSP !c !cs | isTagClose c cs -> Just (_space , (InClose , toBSP cs)) BSP !c !cs | c == _greater -> Just (_space , (InXml , toBSP cs)) BSP _ !cs -> Just (_space , (InTag , toBSP cs)) EmptyBSP -> Nothing blankByteString (InCloseTag, bs) = case bs of BSP !c !cs | c == _greater -> Just (_greater , (InXml , toBSP cs)) BSP _ !cs -> Just (_space , (InCloseTag, toBSP cs)) EmptyBSP -> Nothing blankByteString (InAttrList, bs) = case bs of BSP !c !cs | c == _greater -> Just (_parenright , (InXml , toBSP cs)) BSP !c !cs | isTagClose c cs -> Just (_parenright , (InClose , toBSP cs)) BSP !c !cs | isNameStartChar c -> Just (_a , (InIdent , toBSP cs)) BSP !c !cs | isQuote c -> Just (_v , (InString c, toBSP cs)) BSP _ !cs -> Just (_space , (InAttrList, toBSP cs)) EmptyBSP -> Nothing blankByteString (InClose, bs) = case bs of BSP _ !cs -> Just (_greater , (InXml , toBSP cs)) EmptyBSP -> Nothing blankByteString (InIdent, bs) = case bs of BSP !c !cs | isNameChar c -> Just (_space , (InIdent , toBSP cs)) BSP !c !cs | isSpace c -> Just (_space , (InAttrList, toBSP cs)) BSP !c !cs | c == _equal -> Just (_space , (InAttrList, toBSP cs)) BSP _ !cs -> Just (_space , (InAttrList, toBSP cs)) EmptyBSP -> Nothing blankByteString (InString q, bs) = case bs of BSP !c !cs | c == q -> Just (_space , (InAttrList, toBSP cs)) BSP _ !cs -> Just (_space , (InString q, toBSP cs)) EmptyBSP -> Nothing blankByteString (InText, bs) = case bs of BSP !c !cs | isEndTag c cs -> Just (_space , (InCloseTag, toBSP cs)) BSP _ !cs | headIs (== _less) cs -> Just (_space , (InXml , toBSP cs)) BSP _ !cs -> Just (_space , (InText , toBSP cs)) EmptyBSP -> Nothing blankByteString (InMeta, bs) = case bs of BSP !c !cs | c == _exclam -> Just (_space , (InMeta , toBSP cs)) BSP !c !cs | c == _hyphen -> Just (_space , (InRem 0 , toBSP cs)) BSP !c !cs | c == _bracketleft -> Just (_space , (InCdataTag , toBSP cs)) BSP !c !cs | c == _greater -> Just (_bracketright, (InXml , toBSP cs)) BSP _ !cs -> Just (_space , (InBang 1 , toBSP cs)) EmptyBSP -> Nothing blankByteString (InCdataTag, bs) = case bs of BSP !c !cs | c == _bracketleft -> Just (_space , (InCdata 0 , toBSP cs)) BSP _ !cs -> Just (_space , (InCdataTag , toBSP cs)) EmptyBSP -> Nothing blankByteString (InCdata n, bs) = case bs of BSP !c !cs | c == _greater && n >= 2 -> Just (_bracketright, (InXml , toBSP cs)) BSP !c !cs | isCdataEnd c cs && n>0 -> Just (_space , (InCdata (n+1), toBSP cs)) BSP !c !cs | c == _bracketright -> Just (_space , (InCdata (n+1), toBSP cs)) BSP _ !cs -> Just (_space , (InCdata 0 , toBSP cs)) EmptyBSP -> Nothing blankByteString (InRem n, bs) = case bs of BSP !c !cs | c == _greater && n >= 2 -> Just (_bracketright, (InXml , toBSP cs)) BSP !c !cs | c == _hyphen -> Just (_space , (InRem (n+1) , toBSP cs)) BSP _ !cs -> Just (_space , (InRem 0 , toBSP cs)) EmptyBSP -> Nothing blankByteString (InBang n, bs) = case bs of BSP !c !cs | c == _less -> Just (_bracketleft , (InBang (n+1) , toBSP cs)) BSP !c !cs | c == _greater && n == 1 -> Just (_bracketright, (InXml , toBSP cs)) BSP !c !cs | c == _greater -> Just (_bracketright, (InBang (n-1) , toBSP cs)) BSP _ !cs -> Just (_space , (InBang n , toBSP cs)) EmptyBSP -> Nothing prefix :: Maybe Word8 -> Maybe ByteString -> Maybe ByteStringP prefix (Just s) (Just bs) = Just $ BSP s bs prefix (Just s) Nothing = Just $ BSP s BS.empty prefix Nothing (Just bs) = (\(!c, !cs) -> BSP c cs) <$> BS.uncons bs prefix Nothing Nothing = Nothing toBSP :: ByteString -> ByteStringP toBSP bs = case BS.uncons bs of Just (!c, !cs) -> BSP c cs Nothing -> EmptyBSP lenBSP :: ByteStringP -> Int lenBSP (BSP _ bs) = BS.length bs + 1 lenBSP EmptyBSP = 0 isEndTag :: Word8 -> ByteString -> Bool isEndTag c cs = c == _less && headIs (== _slash) cs -- isStartTag :: Word8 -> ByteString -> Bool -- isStartTag c cs = c == _less && headIs isNameStartChar cs isTagClose :: Word8 -> ByteString -> Bool isTagClose c cs = (c == _slash || c == _question) && headIs (== _greater) cs isMetaStart :: Word8 -> ByteString -> Bool isMetaStart c cs = c == _less && headIs (== _exclam) cs isCdataEnd :: Word8 -> ByteString -> Bool isCdataEnd c cs = c == _bracketright && headIs (== _greater) cs unsnocUndecided :: ByteStringP -> (ByteStringP, Maybe Word8) unsnocUndecided = unscnocIf (\w -> w ==_less -- or ? || w == _slash -- or not? || w == _hyphen -- closing comment or just - ? || w == _bracketright) -- closing CDATA or just data? {-# INLINE unsnocUndecided #-} headIs :: (Word8 -> Bool) -> ByteString -> Bool headIs p bs = case BS.uncons bs of Just (!c, _) -> p c Nothing -> False {-# INLINE headIs #-} unscnocIf :: (Word8 -> Bool) -> ByteStringP -> (ByteStringP, Maybe Word8) unscnocIf _ EmptyBSP = (EmptyBSP, Nothing) unscnocIf p (BSP !c !bs) = case BS.unsnoc bs of Just (bs', w) | p w -> (BSP c bs', Just w) _ -> (BSP c bs , Nothing) {-# INLINE unscnocIf #-}