module Text.HTML.TagSoup.Parsec
( module Text.HTML.TagSoup
, TagParser
, WholeTag
, tParse
, openTag
, maybeOpenTag
, notOpenTag
, allOpenTags
, wholeTag
, maybeWholeTag
, allWholeTags
, closeTag
, maybeCloseTag
, notCloseTag
, allCloseTags
, maybeP
, allP
)
where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.HTML.TagSoup
import Data.Maybe
import Data.Char
type WholeTag =
( Tag , [ Tag ] , Tag )
type TagParser =
GenParser Tag ( )
tParse :: TagParser a -> [ Tag ] -> a
tParse p ts =
case parse p "tagsoup" ts of
Left err ->
error $ show err
Right crap ->
crap
tagEater matcher =
tokenPrim show
( \ oldSp _ _ -> do
setSourceLine oldSp ( 1 + sourceLine oldSp )
)
matcher
lowercase =
map toLower
openTag :: String -> TagParser Tag
openTag soughtName =
openTagMatch soughtName ( Just ) $ \ _ -> Nothing
notOpenTag :: String -> TagParser Tag
notOpenTag avoidName =
openTagMatch avoidName ( \ _ -> Nothing ) Just
openTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagOpen tname atrs ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
closeTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagClose tname ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
wholeTag :: String -> TagParser WholeTag
wholeTag soughtName = do
open <- openTag soughtName
ts <- many $ notCloseTag soughtName
close <- closeTag soughtName
return ( open , ts , close )
closeTag :: String -> TagParser Tag
closeTag soughtName =
closeTagMatch soughtName ( Just ) $ \ _ -> Nothing
notCloseTag :: String -> TagParser Tag
notCloseTag avoidName =
closeTagMatch avoidName ( \ _ -> Nothing ) Just
maybeOpenTag :: String -> TagParser ( Maybe Tag )
maybeOpenTag soughtName =
maybeP $ openTag soughtName
maybeCloseTag :: String -> TagParser ( Maybe Tag )
maybeCloseTag soughtName =
maybeP $ closeTag soughtName
maybeWholeTag :: String -> TagParser ( Maybe WholeTag )
maybeWholeTag soughtName =
maybeP $ wholeTag soughtName
allOpenTags :: String -> TagParser [ Tag ]
allOpenTags t =
allP $ maybeOpenTag t
allCloseTags :: String -> TagParser [ Tag ]
allCloseTags t =
allP $ maybeCloseTag t
allWholeTags :: String -> TagParser [ WholeTag ]
allWholeTags t =
allP $ maybeWholeTag t
allP :: GenParser tok st ( Maybe a ) -> GenParser tok st [ a ]
allP p = do
ts <- many p
let ls =
catMaybes ts
return ls
maybeP :: Show tok => GenParser tok st a -> GenParser tok st ( Maybe a )
maybeP p =
try ( do t <- p
return $ Just t
) <|> ( do anyToken
return Nothing
)