islink- Check if an HTML element is a link

Safe HaskellNone





isLinkAttr :: String -> String -> Bool Source

isLinkAttr tag attr returns True if the attribute attr of an HTML element with tag name tag points to an external resource, and False otherwise. So for example isLinkAttr "a" "href" returns True whereas isLinkAttr "a" "class" returns False. Note that isLinkAttr expects both tag and attr to be in lowercase, so for example isLinkAttr "A" "HREF" returns False.

allLinkAttrs :: HashSet (String, String) Source

A HashSet that contains all combinations of tag names and attributes that correspond to links.

Example with HXT

Here's an example illustrating how to use isLinkAttr with hxt in order to extract all links from an HTML document:

{-# LANGUAGE Arrows #-}

import Text.Html.IsLink
import Text.XML.HXT.Core

-- returns a list of tuples containing the tag name, attribute name,
-- attribute value of all links
getAllLinks :: FilePath -> IO [(String, String, String)]
getAllLinks path = runX $ doc >>> multi getLink
    doc = readDocument [withParseHTML yes, withWarnings no] path

getLink :: ArrowXml a => a XmlTree (String, String, String)
getLink = proc node -> do
    tag <- getName -< node
    attrbNode <- getAttrl -< node
    attrb <- getName -< attrbNode
    val <- xshow getChildren -< attrbNode
    isLinkA -< (tag, attrb, val)
    isLinkA = isLink `guardsP` this
    isLink (tag, attrb, _) = isLinkAttr tag attrb