module Text.HTML.TagSoup.Parsec ( module Text.HTML.TagSoup , TagParser , WholeTag , tParse , openTag , maybeOpenTag , notOpenTag , allOpenTags , wholeTag , maybeWholeTag , allWholeTags , closeTag , maybeCloseTag , notCloseTag , allCloseTags , maybeP , allP ) where import Text.ParserCombinators.Parsec import Text.ParserCombinators.Parsec.Pos import Text.HTML.TagSoup import Data.Maybe import Data.Char -- | A type represent the TagOpen, any inner tags , and the TagClose. type WholeTag = ( Tag , [ Tag ] , Tag ) -- | The Tag parser, using Tag as the token. type TagParser = GenParser Tag ( ) -- | Used to invoke parsing of Tags. tParse :: TagParser a -> [ Tag ] -> a tParse p ts = case parse p "tagsoup" ts of Left err -> error $ show err Right crap -> crap -- Tag eater is the basic tag matcher, it increments the line number for each tag parsed. tagEater matcher = tokenPrim show ( \ oldSp _ _ -> do setSourceLine oldSp ( 1 + sourceLine oldSp ) ) matcher -- make a string lowercase lowercase = map toLower -- | openTag matches a TagOpen with the given name. It is not case sensitive. openTag :: String -> TagParser Tag openTag soughtName = openTagMatch soughtName ( Just ) $ \ _ -> Nothing -- | notOpenTag will match any tag which is not a TagOpen with the given name. It is not case sensitive. notOpenTag :: String -> TagParser Tag notOpenTag avoidName = openTagMatch avoidName ( \ _ -> Nothing ) Just -- openTagMatch is the higher order function which will receive a TagOpen, and call match if it matches the soughtName, and noMatch if it doesn't openTagMatch soughtName match noMatch = tagEater $ \ tag -> case tag of t@( TagOpen tname atrs ) -> if lowercase tname == lowercase soughtName then match t else noMatch t t -> noMatch t -- closeTagMatch is the higher order function which will receive a TagClose, and call match if it matches the soughtName and noMatch if it doesn't. closeTagMatch soughtName match noMatch = tagEater $ \ tag -> case tag of t@( TagClose tname ) -> if lowercase tname == lowercase soughtName then match t else noMatch t t -> noMatch t -- | wholeTag matches a TagOpen with the given name, -- then all intervening tags, -- until it reaches a TagClose with the given name. -- It is not case sensitive. wholeTag :: String -> TagParser WholeTag wholeTag soughtName = do open <- openTag soughtName ts <- many $ notCloseTag soughtName close <- closeTag soughtName return ( open , ts , close ) -- | closeTag matches a TagClose with the given name. It is not case sensitive. closeTag :: String -> TagParser Tag closeTag soughtName = closeTagMatch soughtName ( Just ) $ \ _ -> Nothing -- | notCloseTag will match any tag which is not a TagClose with the given name. It is not case sensitive. notCloseTag :: String -> TagParser Tag notCloseTag avoidName = closeTagMatch avoidName ( \ _ -> Nothing ) Just -- | maybeOpenTag will return `Just` the tag if it gets a TagOpen with he given name, -- It will return `Nothing` otherwise. -- It is not case sensitive. maybeOpenTag :: String -> TagParser ( Maybe Tag ) maybeOpenTag soughtName = maybeP $ openTag soughtName -- | maybeCloseTag will return `Just` the tag if it gets a TagClose with he given name, -- It will return `Nothing` otherwise. -- It is not case sensitive. maybeCloseTag :: String -> TagParser ( Maybe Tag ) maybeCloseTag soughtName = maybeP $ closeTag soughtName -- | maybeWholeTag will return `Just` the tag if it gets a WholeTag with he given name, -- It will return `Nothing` otherwise. -- It is not case sensitive. maybeWholeTag :: String -> TagParser ( Maybe WholeTag ) maybeWholeTag soughtName = maybeP $ wholeTag soughtName -- | allOpenTags will return all TagOpen with the given name. -- It is not case sensitive. allOpenTags :: String -> TagParser [ Tag ] allOpenTags t = allP $ maybeOpenTag t -- | allCloseTags will return all TagClose with the given name. -- It is not case sensitive. allCloseTags :: String -> TagParser [ Tag ] allCloseTags t = allP $ maybeCloseTag t -- | allWholeTags will return all WholeTag with the given name. -- It is not case sensitive. allWholeTags :: String -> TagParser [ WholeTag ] allWholeTags t = allP $ maybeWholeTag t -- | allP takes a parser which returns a `Maybe` value, and returns a list of matching tokens. allP :: GenParser tok st ( Maybe a ) -> GenParser tok st [ a ] allP p = do ts <- many p let ls = catMaybes ts return ls -- | maybeP takes a parser, and becomes its `Maybe` equivalent -- returning `Just` if it matches, and `Nothing` if it doesn't. maybeP :: Show tok => GenParser tok st a -> GenParser tok st ( Maybe a ) maybeP p = try ( do t <- p return $ Just t ) <|> ( do anyToken return Nothing )