Copyright | (c) Neil Mitchell 2006–2019 (TagSoup) Tony Zorman 2020–2022 (port to html-parse) |
---|---|
License | BSD-3 |
Maintainer | Tony Zorman <soliditsallgood@mailbox.org> |
Stability | experimental |
Portability | non-portable |
Safe Haskell | Safe-Inferred |
Language | Haskell2010 |
Utility functions to make working with html-parse
as easy as working
with TagSoup! Most functions are one-to-one replacements for their
respective TagSoup analogues and work the same way.
Synopsis
- toToken :: Text -> Token
- toTokenDefault :: Token -> Text -> Token
- isTagOpen :: Token -> Bool
- isTagClose :: Token -> Bool
- isTagSelfClose :: Token -> Bool
- isContentText :: Token -> Bool
- isContentChar :: Token -> Bool
- isComment :: Token -> Bool
- isDoctype :: Token -> Bool
- isTagOpenName :: Text -> Token -> Bool
- isTagCloseName :: Text -> Token -> Bool
- fromContentText :: Token -> Text
- maybeContentText :: Token -> Maybe Text
- fromAttrib :: Attr -> Token -> Attr
- maybeAttrib :: Attr -> Token -> Maybe Attr
- innerText :: [Token] -> Text
- toHeadContentText :: [Token] -> Text
- between :: Token -> Token -> [Token] -> [Token]
- dropHeader :: [Attr] -> [Token] -> [Token]
- allContentText :: [Token] -> [Text]
- sections :: (a -> Bool) -> [a] -> [[a]]
- section :: (a -> Bool) -> [a] -> [a]
- partitions :: (a -> Bool) -> [a] -> [[a]]
- (~==) :: Token -> Token -> Bool
- (~/=) :: Token -> Token -> Bool
Conversion
toToken :: Text -> Token Source #
Like toTokenDefault
, but with a supplied default value.
>>>
toToken "text"
ContentText "text"
Tag identification
isTagSelfClose :: Token -> Bool Source #
Test if a Token
is a TagSelfClose
.
isContentText :: Token -> Bool Source #
Test if a Token
is a ContentText
.
isContentChar :: Token -> Bool Source #
Test if a Token
is a ContentChar
.
Extraction
fromContentText :: Token -> Text Source #
Extract the string from within ContentText
, crashes if not a
ContentText
.
maybeContentText :: Token -> Maybe Text Source #
Extract the string from within ContentText
, otherwise return Nothing
.
fromAttrib :: Attr -> Token -> Attr Source #
Extract an attribute; crashes if not a TagOpen
. Returns Attr "" ""
if no attribute present.
Warning: does not distinguish between missing attribute and present
attribute with values ""
.
innerText :: [Token] -> Text Source #
Extract all text content from a list of Tokens (similar to Verbatim found in HaXml).
toHeadContentText :: [Token] -> Text Source #
Get the first ContentText
element from a list of Token
s. If no tag
could be found, return an empty string.
dropHeader :: [Attr] -> [Token] -> [Token] Source #
Drop an HTML header (i.e. the header tags and everything in between), as
well as everything before it, from a list of Token
s.
allContentText :: [Token] -> [Text] Source #
Get all ContentText
entries from a list of Token
s and extract their
content.
Utility
sections :: (a -> Bool) -> [a] -> [[a]] Source #
This function takes a list, and returns all suffixes whose first item matches the predicate.
>>>
sections (== 'c') "abc cba ccb"
["c cba ccb","cba ccb","ccb","cb"]
section :: (a -> Bool) -> [a] -> [a] Source #
Like sections
, but return the head element. Returns an empty list if no
head element is present.
>>>
section (== 'c') "abc cba ccb"
"c cba ccb"
partitions :: (a -> Bool) -> [a] -> [[a]] Source #
This function is similar to sections
, but splits the list so no element
appears in any two partitions.
>>>
partitions (== 'c') "abc cba ccb"
["c ","cba ","c","cb"]