module Text.HTML.TagSoup.Parsec
( module Text.HTML.TagSoup
, TagParser
, TagParserSt
, WholeTag
, tParse
, tStParse
, openTag
, maybeOpenTag
, eitherOpenTag
, notOpenTag
, allOpenTags
, wholeTag
, maybeWholeTag
, eitherWholeTag
, allWholeTags
, closeTag
, maybeCloseTag
, eitherCloseTag
, notCloseTag
, allCloseTags
, maybeP
, allP
, eitherP
)
where
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.HTML.TagSoup
import Text.StringLike
import Data.Maybe
import Data.Char
-- | A type represent the TagOpen, any inner tags , and the TagClose.
type WholeTag str =
( Tag str, [ Tag str] , Tag str)
-- | The Tag parser, using Tag as the token.
type TagParser str =
GenParser (Tag str) ( )
-- | A stateful tag parser
-- This is a new type alias to allow backwards compatibility with old code.
type TagParserSt str =
GenParser (Tag str)
-- | Used to invoke parsing of Tags.
tParse :: (StringLike str, Show str) => TagParser str a -> [ Tag str ] -> a
tParse p ts =
either ( error . show ) id $ parse p "tagsoup" ts
-- | Simply run a stateful tag parser
tStParse :: (StringLike str, Show str) => TagParserSt str st a -> st -> [ Tag str ] -> a
tStParse p state tos =
either ( error . show ) id $ runParser p state "tagsoup" tos
-- Tag eater is the basic tag matcher, it increments the line number for each tag parsed.
tagEater matcher =
tokenPrim show
( \ oldSp _ _ -> do
setSourceLine oldSp ( 1 + sourceLine oldSp )
)
matcher
-- make a string lowercase
lowercase :: StringLike s => s -> s
lowercase =
fromString . map toLower . toString
-- | openTag matches a TagOpen with the given name. It is not case sensitive.
openTag :: (StringLike str, Show str) => str -> TagParserSt str st (Tag str)
openTag soughtName =
openTagMatch soughtName ( Just ) $ \ _ -> Nothing
-- | notOpenTag will match any tag which is not a TagOpen with the given name. It is not case sensitive.
notOpenTag :: (StringLike str, Show str) => str -> TagParserSt str st (Tag str)
notOpenTag avoidName =
openTagMatch avoidName ( \ _ -> Nothing ) Just
-- openTagMatch is the higher order function which will receive a TagOpen, and call match if it matches the soughtName, and noMatch if it doesn't
openTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagOpen tname atrs ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
-- closeTagMatch is the higher order function which will receive a TagClose, and call match if it matches the soughtName and noMatch if it doesn't.
closeTagMatch soughtName match noMatch =
tagEater $ \ tag ->
case tag of
t@( TagClose tname ) ->
if lowercase tname == lowercase soughtName
then
match t
else
noMatch t
t ->
noMatch t
-- | wholeTag matches a TagOpen with the given name,
-- then all intervening tags,
-- until it reaches a TagClose with the given name.
-- It is not case sensitive.
wholeTag :: (StringLike str, Show str) => str -> TagParserSt str st (WholeTag str)
wholeTag soughtName = do
open <- openTag soughtName
ts <- many $ notCloseTag soughtName
close <- closeTag soughtName
return ( open , ts , close )
-- | closeTag matches a TagClose with the given name. It is not case sensitive.
closeTag :: (StringLike str, Show str) => str -> TagParserSt str st (Tag str)
closeTag soughtName =
closeTagMatch soughtName ( Just ) $ \ _ -> Nothing
-- | notCloseTag will match any tag which is not a TagClose with the given name. It is not case sensitive.
notCloseTag :: (StringLike str, Show str) => str -> TagParserSt str st (Tag str)
notCloseTag avoidName =
closeTagMatch avoidName ( \ _ -> Nothing ) Just
-- | maybeOpenTag will return `Just` the tag if it gets a TagOpen with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeOpenTag :: (StringLike str, Show str) => str -> TagParserSt str st ( Maybe (Tag str) )
maybeOpenTag =
maybeP . openTag
-- | maybeCloseTag will return `Just` the tag if it gets a TagClose with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeCloseTag :: (StringLike str, Show str) => str -> TagParserSt str st ( Maybe (Tag str) )
maybeCloseTag =
maybeP . closeTag
-- | maybeWholeTag will return `Just` the tag if it gets a WholeTag with he given name,
-- It will return `Nothing` otherwise.
-- It is not case sensitive.
maybeWholeTag :: (StringLike str, Show str) => str -> TagParserSt str st ( Maybe (WholeTag str) )
maybeWholeTag =
maybeP . wholeTag
-- | allOpenTags will return all TagOpen with the given name.
-- It is not case sensitive.
allOpenTags :: (StringLike str, Show str) => str -> TagParserSt str st [ Tag str ]
allOpenTags =
allP . maybeOpenTag
-- | allCloseTags will return all TagClose with the given name.
-- It is not case sensitive.
allCloseTags :: (StringLike str, Show str) => str -> TagParserSt str st [ Tag str ]
allCloseTags =
allP . maybeCloseTag
-- | allWholeTags will return all WholeTag with the given name.
-- It is not case sensitive.
allWholeTags :: (StringLike str, Show str) => str -> TagParserSt str st [ WholeTag str ]
allWholeTags =
allP . maybeWholeTag
-- | eitherP takes a parser, and becomes its `Either` equivalent -- returning `Right` if it matches, and `Left` of anyToken if it doesn't.
eitherP :: Show tok => GenParser tok st a -> GenParser tok st ( Either tok a )
eitherP p = do
try ( do t <- p
return $ Right t
) <|> ( do t <- anyToken
return $ Left t
)
-- | either a Right TagOpen or a Left arbitary tag.
eitherOpenTag :: (StringLike str, Show str) => str -> TagParserSt str st ( Either (Tag str) (Tag str) )
eitherOpenTag =
eitherP . openTag
-- | either a Right TagClose or a Left arbitary tag.
eitherCloseTag :: (StringLike str, Show str) => str -> TagParserSt str st ( Either (Tag str) (Tag str) )
eitherCloseTag =
eitherP . closeTag
-- | either a Right WholeTag or a Left arbitary tag.
eitherWholeTag :: (StringLike str, Show str) => str -> TagParserSt str st ( Either (Tag str) (WholeTag str) )
eitherWholeTag =
eitherP . wholeTag
-- | allP takes a parser which returns a `Maybe` value, and returns a list of matching tokens.
allP :: GenParser tok st ( Maybe a ) -> GenParser tok st [ a ]
allP p = do
ts <- many p
let ls =
catMaybes ts
return ls
-- | maybeP takes a parser, and becomes its `Maybe` equivalent -- returning `Just` if it matches, and `Nothing` if it doesn't.
maybeP :: Show tok => GenParser tok st a -> GenParser tok st ( Maybe a )
maybeP p =
try ( do t <- p
return $ Just t
) <|> ( do anyToken
return Nothing
)