module Text.Feed.Crawl.DetectLink where
import Text.XML.HXT.Core
import qualified Data.ByteString.Char8 as B
import Text.Feed.Crawl.Common
findFeedLinks :: String -> IO [Link]
findFeedLinks input = do
runX (
readString [
withParseHTML yes
, withWarnings no
] input
>>>
scrapePage )
scrapePage =
deep (isElem >>> hasName "head") >>>
deep (isElem
>>> hasName "link"
>>> hasAttrValue "rel" (== "alternate")
>>> (proc x -> do
rel <- getAttrValue "rel" -< x
href <- getAttrValue "href" -< x
typ <- getAttrValue "type" -< x
title <- (getAttrValue "title" `orElse` constA "") -< x
returnA -< Link rel href typ title)
)