module Text.HTML.TagStream.Parser where
import Prelude hiding (takeWhile)
import Control.Applicative hiding (many)
import Data.Attoparsec.Char8
import Data.ByteString (ByteString)
import qualified Data.ByteString.Char8 as S
import Text.HTML.TagStream.Types
(||.) :: Applicative f => f Bool -> f Bool -> f Bool
(||.) = liftA2 (||)
value :: Parser ByteString
value = char '"' *> str
<|> takeTill (inClass ">=" ||. isSpace)
where
str = S.append <$> takeTill (inClass "\\\"")
<*> (end <|> unescape)
end = char '"' *> return ""
unescape = char '\\' *>
(S.cons <$> anyChar <*> str)
attr :: Parser Attr
attr = do
skipSpace
c <- satisfy (notInClass "/>")
name' <- takeTill (inClass ">=" ||. isSpace)
let name = S.cons c name'
skipSpace
option (name, S.empty) $ do
_ <- char '='
skipSpace
(name,) <$> value
attrs :: Parser [Attr]
attrs = many attr
comment :: Parser ByteString
comment = S.append <$>
takeTill (=='-') <*>
( string "-->" *> return "" <|>
S.cons <$> anyChar <*> comment )
special :: Parser Token
special = Comment <$> ( string "--" *> comment )
<|> Special
<$> ( S.cons
<$> satisfy (not . ((=='-') ||. isSpace))
<*> takeTill ((=='>') ||. isSpace)
<* skipSpace )
<*> takeTill (=='>')
<* char '>'
tag :: Parser Token
tag = string "<!" *> special
<|> string "</"
*> (TagClose <$> takeTill (=='>'))
<* char '>'
<|> char '<'
*> ( TagOpen
<$> ( S.cons
<$> satisfy (not . (isSpace ||. (inClass "!>")))
<*> takeTill (inClass "/>" ||. isSpace) )
<*> attrs <* skipSpace
<*> ( char '>' *> return False
<|> string "/>" *> return True ) )
text :: Parser Token
text = Text <$> (
S.cons <$> anyChar <*> takeTill (=='<')
)
token :: Parser Token
token = tag <|> text
html :: Parser [Token]
html = many token
decode :: ByteString -> Either String [Token]
decode = parseOnly html