module Text.HTML.TagSoup.Parsec
( module Text.HTML.TagSoup
, TagParser
, WholeTag
, tParse
, openTag
, maybeOpenTag
, eitherOpenTag
, notOpenTag
, allOpenTags
, wholeTag
, maybeWholeTag
, eitherWholeTag
, allWholeTags
, closeTag
, maybeCloseTag
, eitherCloseTag
, notCloseTag
, allCloseTags
, maybeP
, allP
, eitherP
)
where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.HTML.TagSoup
import Data.Maybe
import Data.Char
type WholeTag =
( Tag , [ Tag ] , Tag )
type TagParser =
GenParser Tag ( )
tParse :: TagParser a -> [ Tag ] -> a
tParse p ts =
case parse p "tagsoup" ts of
Left err ->
error $ show err
Right crap ->
crap
tagEater matcher =
tokenPrim show
( \ oldSp _ _ -> do
setSourceLine oldSp ( 1 + sourceLine oldSp )
)
matcher
lowercase =
map toLower
openTag :: String -> TagParser Tag
openTag soughtName =
openTagMatch soughtName ( Just ) $ \ _ -> Nothing
notOpenTag :: String -> TagParser Tag
notOpenTag avoidName =
openTagMatch avoidName ( \ _ -> Nothing ) Just
openTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagOpen tname atrs ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
closeTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagClose tname ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
wholeTag :: String -> TagParser WholeTag
wholeTag soughtName = do
open <- openTag soughtName
ts <- many $ notCloseTag soughtName
close <- closeTag soughtName
return ( open , ts , close )
closeTag :: String -> TagParser Tag
closeTag soughtName =
closeTagMatch soughtName ( Just ) $ \ _ -> Nothing
notCloseTag :: String -> TagParser Tag
notCloseTag avoidName =
closeTagMatch avoidName ( \ _ -> Nothing ) Just
maybeOpenTag :: String -> TagParser ( Maybe Tag )
maybeOpenTag =
maybeP . openTag
maybeCloseTag :: String -> TagParser ( Maybe Tag )
maybeCloseTag =
maybeP . closeTag
maybeWholeTag :: String -> TagParser ( Maybe WholeTag )
maybeWholeTag =
maybeP . wholeTag
allOpenTags :: String -> TagParser [ Tag ]
allOpenTags =
allP . maybeOpenTag
allCloseTags :: String -> TagParser [ Tag ]
allCloseTags =
allP . maybeCloseTag
allWholeTags :: String -> TagParser [ WholeTag ]
allWholeTags =
allP . maybeWholeTag
eitherP :: Show tok => GenParser tok st a -> GenParser tok st ( Either tok a )
eitherP p = do
try ( do t <- p
return $ Right t
) <|> ( do t <- anyToken
return $ Left t
)
eitherOpenTag :: String -> TagParser ( Either Tag Tag )
eitherOpenTag =
eitherP . openTag
eitherCloseTag :: String -> TagParser ( Either Tag Tag )
eitherCloseTag =
eitherP . closeTag
eitherWholeTag :: String -> TagParser ( Either Tag WholeTag )
eitherWholeTag =
eitherP . wholeTag
allP :: GenParser tok st ( Maybe a ) -> GenParser tok st [ a ]
allP p = do
ts <- many p
let ls =
catMaybes ts
return ls
maybeP :: Show tok => GenParser tok st a -> GenParser tok st ( Maybe a )
maybeP p =
try ( do t <- p
return $ Just t
) <|> ( do anyToken
return Nothing
)