module Text.HTML.TagSoup.Parsec
( module Text.HTML.TagSoup
, TagParser
, TagParserSt
, WholeTag
, tParse
, tStParse
, openTag
, maybeOpenTag
, eitherOpenTag
, notOpenTag
, allOpenTags
, wholeTag
, maybeWholeTag
, eitherWholeTag
, allWholeTags
, closeTag
, maybeCloseTag
, eitherCloseTag
, notCloseTag
, allCloseTags
, maybeP
, allP
, eitherP
)
where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.HTML.TagSoup
import Data.Maybe
import Data.Char
type WholeTag =
( Tag , [ Tag ] , Tag )
type TagParser =
GenParser Tag ( )
type TagParserSt =
GenParser Tag
tParse :: TagParser a -> [ Tag ] -> a
tParse p ts =
either ( error . show ) id $ parse p "tagsoup" ts
tStParse :: TagParserSt st a -> st -> [ Tag ] -> a
tStParse p state tos =
either ( error . show ) id $ runParser p state "tagsoup" tos
tagEater matcher =
tokenPrim show
( \ oldSp _ _ -> do
setSourceLine oldSp ( 1 + sourceLine oldSp )
)
matcher
lowercase =
map toLower
openTag :: String -> TagParserSt st Tag
openTag soughtName =
openTagMatch soughtName ( Just ) $ \ _ -> Nothing
notOpenTag :: String -> TagParserSt st Tag
notOpenTag avoidName =
openTagMatch avoidName ( \ _ -> Nothing ) Just
openTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagOpen tname atrs ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
closeTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagClose tname ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
wholeTag :: String -> TagParserSt st WholeTag
wholeTag soughtName = do
open <- openTag soughtName
ts <- many $ notCloseTag soughtName
close <- closeTag soughtName
return ( open , ts , close )
closeTag :: String -> TagParserSt st Tag
closeTag soughtName =
closeTagMatch soughtName ( Just ) $ \ _ -> Nothing
notCloseTag :: String -> TagParserSt st Tag
notCloseTag avoidName =
closeTagMatch avoidName ( \ _ -> Nothing ) Just
maybeOpenTag :: String -> TagParserSt st ( Maybe Tag )
maybeOpenTag =
maybeP . openTag
maybeCloseTag :: String -> TagParserSt st ( Maybe Tag )
maybeCloseTag =
maybeP . closeTag
maybeWholeTag :: String -> TagParserSt st ( Maybe WholeTag )
maybeWholeTag =
maybeP . wholeTag
allOpenTags :: String -> TagParserSt st [ Tag ]
allOpenTags =
allP . maybeOpenTag
allCloseTags :: String -> TagParserSt st [ Tag ]
allCloseTags =
allP . maybeCloseTag
allWholeTags :: String -> TagParserSt st [ WholeTag ]
allWholeTags =
allP . maybeWholeTag
eitherP :: Show tok => GenParser tok st a -> GenParser tok st ( Either tok a )
eitherP p = do
try ( do t <- p
return $ Right t
) <|> ( do t <- anyToken
return $ Left t
)
eitherOpenTag :: String -> TagParserSt st ( Either Tag Tag )
eitherOpenTag =
eitherP . openTag
eitherCloseTag :: String -> TagParserSt st ( Either Tag Tag )
eitherCloseTag =
eitherP . closeTag
eitherWholeTag :: String -> TagParserSt st ( Either Tag WholeTag )
eitherWholeTag =
eitherP . wholeTag
allP :: GenParser tok st ( Maybe a ) -> GenParser tok st [ a ]
allP p = do
ts <- many p
let ls =
catMaybes ts
return ls
maybeP :: Show tok => GenParser tok st a -> GenParser tok st ( Maybe a )
maybeP p =
try ( do t <- p
return $ Just t
) <|> ( do anyToken
return Nothing
)