{-# LANGUAGE OverloadedStrings, TupleSections #-} module Text.HTML.TagStream.Parser where import Prelude hiding (takeWhile) import Control.Applicative hiding (many) import Data.Attoparsec.Char8 import Data.ByteString (ByteString) import qualified Data.ByteString.Char8 as S import Text.HTML.TagStream.Types (||.) :: Applicative f => f Bool -> f Bool -> f Bool (||.) = liftA2 (||) -- (&&.) = liftA2 (&&) value :: Parser ByteString value = char '"' *> str <|> takeTill (inClass ">=" ||. isSpace) where str = S.append <$> takeTill (inClass "\\\"") <*> (end <|> unescape) end = char '"' *> return "" unescape = char '\\' *> (S.cons <$> anyChar <*> str) attr :: Parser Attr attr = do skipSpace c <- satisfy (notInClass "/>") name' <- takeTill (inClass ">=" ||. isSpace) let name = S.cons c name' skipSpace option (name, S.empty) $ do _ <- char '=' skipSpace (name,) <$> value attrs :: Parser [Attr] attrs = many attr comment :: Parser ByteString comment = S.append <$> takeTill (=='-') <*> ( string "-->" *> return "" <|> S.cons <$> anyChar <*> comment ) special :: Parser Token special = Comment <$> ( string "--" *> comment ) <|> Special <$> ( S.cons <$> satisfy (not . ((=='-') ||. isSpace)) <*> takeTill ((=='>') ||. isSpace) <* skipSpace ) <*> takeTill (=='>') <* char '>' tag :: Parser Token tag = string " special <|> string " (TagClose <$> takeTill (=='>')) <* char '>' <|> char '<' *> ( TagOpen <$> ( S.cons <$> satisfy (not . (isSpace ||. (inClass "!>"))) <*> takeTill (inClass "/>" ||. isSpace) ) <*> attrs <* skipSpace <*> ( char '>' *> return False <|> string "/>" *> return True ) ) text :: Parser Token text = Text <$> ( S.cons <$> anyChar <*> takeTill (=='<') ) token :: Parser Token token = tag <|> text html :: Parser [Token] html = many token decode :: ByteString -> Either String [Token] decode = parseOnly html