module Network.Shpider.Links
( Link (..)
, gatherLinks
, allLinks
)
where
import Data.Maybe
import Text.HTML.TagSoup.Parsec
import Network.Shpider.TextUtils
gatherLinks :: [ Tag String ] -> [ Link ]
gatherLinks =
tParse allLinks
allLinks :: TagParser String [ Link ]
allLinks = do
ls <- allWholeTags "a"
return $ toLinks ls
toLinks tags =
catMaybes $ map toLink tags
toLink ( TagOpen _ attrs , innerTags , _ ) = do
address <- attrLookup "href" attrs
return $ Link { linkAddress = address
, linkText = innerText innerTags
}
data Link =
Link { linkAddress :: String
, linkText :: String
}
deriving ( Show , Eq )