module Network.Shpider.Links ( Link (..) , gatherLinks , allLinks ) where import Data.Maybe import Text.HTML.TagSoup.Parsec import Network.Shpider.TextUtils -- | Parse all links from a list of tags. gatherLinks :: [ Tag ] -> [ Link ] gatherLinks = tParse allLinks -- | The parser responsible for getting all the links. allLinks :: TagParser [ Link ] allLinks = do ls <- allWholeTags "a" return $ toLinks ls toLinks tags = catMaybes $ map toLink tags toLink ( TagOpen _ attrs , innerTags , _ ) = do address <- attrLookup "href" attrs return $ Link { linkAddress = address , linkText = innerText innerTags } -- | Links have an address, corresponding to the href attribute, and some inner tex. data Link = Link { linkAddress :: String , linkText :: String } deriving ( Show , Eq )