module Text.HTML.TagSoup.Parsec
( module Text.HTML.TagSoup
, TagParser
, WholeTag
, tParse
, openTag
, maybeOpenTag
, notOpenTag
, allOpenTags
, wholeTag
, maybeWholeTag
, allWholeTags
, closeTag
, maybeCloseTag
, notCloseTag
, allCloseTags
, maybeP
, allP
)
where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.HTML.TagSoup
import Data.Maybe
import Data.Char
-- | A type represent the TagOpen, any inner tags , and the TagClose.
type WholeTag =
( Tag , [ Tag ] , Tag )
-- | The Tag parser, using Tag as the token.
type TagParser =
GenParser Tag ( )
-- | Used to invoke parsing of Tags.
tParse :: TagParser a -> [ Tag ] -> a
tParse p ts =
case parse p "tagsoup" ts of
Left err ->
error $ show err
Right crap ->
crap
-- Tag eater is the basic tag matcher, it increments the line number for each tag parsed.
tagEater matcher =
tokenPrim show
( \ oldSp _ _ -> do
setSourceLine oldSp ( 1 + sourceLine oldSp )
)
matcher
-- make a string lowercase
lowercase =
map toLower
-- | openTag matches a TagOpen with the given name. It is not case sensitive.
openTag :: String -> TagParser Tag
openTag soughtName =
openTagMatch soughtName ( Just ) $ \ _ -> Nothing
-- | notOpenTag will match any tag which is not a TagOpen with the given name. It is not case sensitive.
notOpenTag :: String -> TagParser Tag
notOpenTag avoidName =
openTagMatch avoidName ( \ _ -> Nothing ) Just
-- openTagMatch is the higher order function which will receive a TagOpen, and call match if it matches the soughtName, and noMatch if it doesn't
openTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagOpen tname atrs ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
-- closeTagMatch is the higher order function which will receive a TagClose, and call match if it matches the soughtName and noMatch if it doesn't.
closeTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagClose tname ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
-- | wholeTag matches a TagOpen with the given name,
-- then all intervening tags,
-- until it reaches a TagClose with the given name.
-- It is not case sensitive.
wholeTag :: String -> TagParser WholeTag
wholeTag soughtName = do
open <- openTag soughtName
ts <- many $ notCloseTag soughtName
close <- closeTag soughtName
return ( open , ts , close )
-- | closeTag matches a TagClose with the given name. It is not case sensitive.
closeTag :: String -> TagParser Tag
closeTag soughtName =
closeTagMatch soughtName ( Just ) $ \ _ -> Nothing
-- | notCloseTag will match any tag which is not a TagClose with the given name. It is not case sensitive.
notCloseTag :: String -> TagParser Tag
notCloseTag avoidName =
closeTagMatch avoidName ( \ _ -> Nothing ) Just
-- | maybeOpenTag will return `Just` the tag if it gets a TagOpen with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeOpenTag :: String -> TagParser ( Maybe Tag )
maybeOpenTag soughtName =
maybeP $ openTag soughtName
-- | maybeCloseTag will return `Just` the tag if it gets a TagClose with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeCloseTag :: String -> TagParser ( Maybe Tag )
maybeCloseTag soughtName =
maybeP $ closeTag soughtName
-- | maybeWholeTag will return `Just` the tag if it gets a WholeTag with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeWholeTag :: String -> TagParser ( Maybe WholeTag )
maybeWholeTag soughtName =
maybeP $ wholeTag soughtName
-- | allOpenTags will return all TagOpen with the given name.
-- It is not case sensitive.
allOpenTags :: String -> TagParser [ Tag ]
allOpenTags t =
allP $ maybeOpenTag t
-- | allCloseTags will return all TagClose with the given name.
-- It is not case sensitive.
allCloseTags :: String -> TagParser [ Tag ]
allCloseTags t =
allP $ maybeCloseTag t
-- | allWholeTags will return all WholeTag with the given name.
-- It is not case sensitive.
allWholeTags :: String -> TagParser [ WholeTag ]
allWholeTags t =
allP $ maybeWholeTag t
-- | allP takes a parser which returns a `Maybe` value, and returns a list of matching tokens.
allP :: GenParser tok st ( Maybe a ) -> GenParser tok st [ a ]
allP p = do
ts <- many p
let ls =
catMaybes ts
return ls
-- | maybeP takes a parser, and becomes its `Maybe` equivalent -- returning `Just` if it matches, and `Nothing` if it doesn't.
maybeP :: Show tok => GenParser tok st a -> GenParser tok st ( Maybe a )
maybeP p =
try ( do t <- p
return $ Just t
) <|> ( do anyToken
return Nothing
)