Copyright	(c) Neil Mitchell 2006–2019 (TagSoup) Tony Zorman 2020–2022 (port to html-parse)
License	BSD-3
Maintainer	Tony Zorman <soliditsallgood@mailbox.org>
Stability	experimental
Portability	non-portable
Safe Haskell	Safe-Inferred
Language	Haskell2010

Text.HTML.Parser.Util

Contents

Conversion
Tag identification
Extraction
Utility
Combinators

Description

Utility functions to make working with html-parse as easy as working with TagSoup! Most functions are one-to-one replacements for their respective TagSoup analogues and work the same way.

Synopsis

toToken :: Text -> Token
toTokenDefault :: Token -> Text -> Token
isTagOpen :: Token -> Bool
isTagClose :: Token -> Bool
isTagSelfClose :: Token -> Bool
isContentText :: Token -> Bool
isContentChar :: Token -> Bool
isComment :: Token -> Bool
isDoctype :: Token -> Bool
isTagOpenName :: Text -> Token -> Bool
isTagCloseName :: Text -> Token -> Bool
fromContentText :: Token -> Text
maybeContentText :: Token -> Maybe Text
fromAttrib :: Attr -> Token -> Attr
maybeAttrib :: Attr -> Token -> Maybe Attr
innerText :: [Token] -> Text
toHeadContentText :: [Token] -> Text
between :: Token -> Token -> [Token] -> [Token]
dropHeader :: [Attr] -> [Token] -> [Token]
allContentText :: [Token] -> [Text]
sections :: (a -> Bool) -> [a] -> [[a]]
section :: (a -> Bool) -> [a] -> [a]
partitions :: (a -> Bool) -> [a] -> [[a]]
(~==) :: Token -> Token -> Bool
(~/=) :: Token -> Token -> Bool

Conversion

toToken :: Text -> Token Source #

Like toTokenDefault, but with a supplied default value.

>>> toToken "text"
ContentText "text"

toTokenDefault :: Token -> Text -> Token Source #

Convert Text to Token, with a default in case of a parse failure.

Tag identification

isTagOpen :: Token -> Bool Source #

Test if a Token is a TagOpen.

isTagClose :: Token -> Bool Source #

Test if a Token is a TagClose.

isTagSelfClose :: Token -> Bool Source #

Test if a Token is a TagSelfClose.

isContentText :: Token -> Bool Source #

Test if a Token is a ContentText.

isContentChar :: Token -> Bool Source #

Test if a Token is a ContentChar.

isComment :: Token -> Bool Source #

Test if a Token is a Comment.

isDoctype :: Token -> Bool Source #

Test if a Token is a Doctype.

isTagOpenName :: Text -> Token -> Bool Source #

Returns True if the Token is TagOpen and matches the given name.

isTagCloseName :: Text -> Token -> Bool Source #

Returns True if the Token is TagClose and matches the given name.

Extraction

fromContentText :: Token -> Text Source #

Extract the string from within ContentText, crashes if not a ContentText.

maybeContentText :: Token -> Maybe Text Source #

Extract the string from within ContentText, otherwise return Nothing.

fromAttrib :: Attr -> Token -> Attr Source #

Extract an attribute; crashes if not a TagOpen. Returns Attr "" "" if no attribute present.

Warning: does not distinguish between missing attribute and present attribute with values "".

maybeAttrib :: Attr -> Token -> Maybe Attr Source #

Extract an attribute; crashes if not a TagOpen. Returns Nothing if no attribute present.

innerText :: [Token] -> Text Source #

Extract all text content from a list of Tokens (similar to Verbatim found in HaXml).

toHeadContentText :: [Token] -> Text Source #

Get the first ContentText element from a list of Tokens. If no tag could be found, return an empty string.

between :: Token -> Token -> [Token] -> [Token] Source #

Get all Tokens between start and end.

dropHeader :: [Attr] -> [Token] -> [Token] Source #

Drop an HTML header (i.e. the header tags and everything in between), as well as everything before it, from a list of Tokens.

allContentText :: [Token] -> [Text] Source #

Get all ContentText entries from a list of Tokens and extract their content.

Utility

sections :: (a -> Bool) -> [a] -> [[a]] Source #

This function takes a list, and returns all suffixes whose first item matches the predicate.

>>> sections (== 'c') "abc cba ccb"
["c cba ccb","cba ccb","ccb","cb"]

section :: (a -> Bool) -> [a] -> [a] Source #

Like sections, but return the head element. Returns an empty list if no head element is present.

>>> section (== 'c') "abc cba ccb"
"c cba ccb"

partitions :: (a -> Bool) -> [a] -> [[a]] Source #

This function is similar to sections, but splits the list so no element appears in any two partitions.

>>> partitions (== 'c') "abc cba ccb"
["c ","cba ","c","cb"]

Combinators

(~==) :: Token -> Token -> Bool infixl 9 Source #

Performs an inexact match, the first item should be the thing to match.

>>> ContentText "test" ~== ContentText ""
True

>>> TagOpen "div" [Attr "class" "division ", Attr "id" "dd"] ~== TagOpen "div" [Attr "class" "division "]
True

(~/=) :: Token -> Token -> Bool infixl 9 Source #

Negation of (~==).