{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} module HaskellWorks.Data.Xml.Blank ( blankXml ) where import Data.ByteString (ByteString) import Data.Word import Data.Word8 import HaskellWorks.Data.Xml.Internal.Words import Prelude import qualified Data.ByteString as BS type ExpectedChar = Word8 data BlankState = InXml | InTag | InAttrList | InCloseTag | InClose | InBang Int | InString ExpectedChar | InText | InMeta | InCdataTag | InCdata Int | InRem Int | InIdent deriving (Eq, Show) data ByteStringP = BSP Word8 ByteString | EmptyBSP deriving Show blankXml :: BS.ByteString -> BS.ByteString blankXml as = fst (BS.unfoldrN (BS.length as) go (InXml, as)) where go :: (BlankState, ByteString) -> Maybe (Word8, (BlankState, ByteString)) go (InXml, bs) = case BS.uncons bs of Just (!c, !cs) | isMetaStart c cs -> Just (_bracketleft , (InMeta , cs)) Just (!c, !cs) | isEndTag c cs -> Just (_space , (InCloseTag , cs)) Just (!c, !cs) | isTextStart c -> Just (_t , (InText , cs)) Just (!c, !cs) | c == _less -> Just (_less , (InTag , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InXml , cs)) Just ( _, !cs) -> Just (_space , (InXml , cs)) Nothing -> Nothing go (InTag, bs) = case BS.uncons bs of Just (!c, !cs) | isSpace c -> Just (_parenleft , (InAttrList , cs)) Just (!c, !cs) | isTagClose c cs -> Just (_space , (InClose , cs)) Just (!c, !cs) | c == _greater -> Just (_space , (InXml , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InTag , cs)) Just ( _, !cs) -> Just (_space , (InTag , cs)) Nothing -> Nothing go (InCloseTag, bs) = case BS.uncons bs of Just (!c, !cs) | c == _greater -> Just (_greater , (InXml , cs)) Just ( _, !cs) -> Just (_space , (InCloseTag , cs)) Nothing -> Nothing go (InAttrList, bs) = case BS.uncons bs of Just (!c, !cs) | c == _greater -> Just (_parenright , (InXml , cs)) Just (!c, !cs) | isTagClose c cs -> Just (_parenright , (InClose , cs)) Just (!c, !cs) | isNameStartChar c -> Just (_a , (InIdent , cs)) Just (!c, !cs) | isQuote c -> Just (_v , (InString c , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InAttrList , cs)) Just ( _, !cs) -> Just (_space , (InAttrList , cs)) Nothing -> Nothing go (InClose, bs) = case BS.uncons bs of Just (_, !cs) -> Just (_greater , (InXml , cs)) Nothing -> Nothing go (InIdent, bs) = case BS.uncons bs of Just (!c, !cs) | isNameChar c -> Just (_space , (InIdent , cs)) Just (!c, !cs) | isSpace c -> Just (_space , (InAttrList , cs)) Just (!c, !cs) | c == _equal -> Just (_space , (InAttrList , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InAttrList , cs)) Just ( _, !cs) -> Just (_space , (InAttrList , cs)) Nothing -> Nothing go (InString q, bs) = case BS.uncons bs of Just (!c, !cs) | c == q -> Just (_space , (InAttrList , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InString q , cs)) Just ( _, !cs) -> Just (_space , (InString q , cs)) Nothing -> Nothing go (InText, bs) = case BS.uncons bs of Just (!c, !cs) | isEndTag c cs -> Just (_space , (InCloseTag , cs)) Just ( _, !cs) | headIs (== _less) cs -> Just (_space , (InXml , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InText , cs)) Just ( _, !cs) -> Just (_space , (InText , cs)) Nothing -> Nothing go (InMeta, bs) = case BS.uncons bs of Just (!c, !cs) | c == _exclam -> Just (_space , (InMeta , cs)) Just (!c, !cs) | c == _hyphen -> Just (_space , (InRem 0 , cs)) Just (!c, !cs) | c == _bracketleft -> Just (_space , (InCdataTag , cs)) Just (!c, !cs) | c == _greater -> Just (_bracketright, (InXml , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InBang 1 , cs)) Just ( _, !cs) -> Just (_space , (InBang 1 , cs)) Nothing -> Nothing go (InCdataTag, bs) = case BS.uncons bs of Just (!c, !cs) | c == _bracketleft -> Just (_space , (InCdata 0 , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InCdataTag , cs)) Just ( _, !cs) -> Just (_space , (InCdataTag , cs)) Nothing -> Nothing go (InCdata n, bs) = case BS.uncons bs of Just (!c, !cs) | c == _greater && n >= 2 -> Just (_bracketright, (InXml , cs)) Just (!c, !cs) | isCdataEnd c cs && n > 0 -> Just (_space , (InCdata (n+1), cs)) Just (!c, !cs) | c == _bracketright -> Just (_space , (InCdata (n+1), cs)) Just (!c, !cs) | isSpace c -> Just (c , (InCdata 0 , cs)) Just ( _, !cs) -> Just (_space , (InCdata 0 , cs)) Nothing -> Nothing go (InRem n, bs) = case BS.uncons bs of Just (!c, !cs) | c == _greater && n >= 2 -> Just (_bracketright, (InXml , cs)) Just (!c, !cs) | c == _hyphen -> Just (_space , (InRem (n+1) , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InRem 0 , cs)) Just ( _, !cs) -> Just (_space , (InRem 0 , cs)) Nothing -> Nothing go (InBang n, bs) = case BS.uncons bs of Just (!c, !cs) | c == _less -> Just (_bracketleft , (InBang (n+1) , cs)) Just (!c, !cs) | c == _greater && n == 1 -> Just (_bracketright, (InXml , cs)) Just (!c, !cs) | c == _greater -> Just (_bracketright, (InBang (n-1) , cs)) Just (!c, !cs) | isSpace c -> Just (c , (InBang n , cs)) Just ( _, !cs) -> Just (_space , (InBang n , cs)) Nothing -> Nothing isEndTag :: Word8 -> ByteString -> Bool isEndTag c cs = c == _less && headIs (== _slash) cs {-# INLINE isEndTag #-} isTagClose :: Word8 -> ByteString -> Bool isTagClose c cs = (c == _slash) || ((c == _slash || c == _question) && headIs (== _greater) cs) {-# INLINE isTagClose #-} isMetaStart :: Word8 -> ByteString -> Bool isMetaStart c cs = c == _less && headIs (== _exclam) cs {-# INLINE isMetaStart #-} isCdataEnd :: Word8 -> ByteString -> Bool isCdataEnd c cs = c == _bracketright && headIs (== _greater) cs {-# INLINE isCdataEnd #-} headIs :: (Word8 -> Bool) -> ByteString -> Bool headIs p bs = case BS.uncons bs of Just (!c, _) -> p c Nothing -> False {-# INLINE headIs #-}