{-
 -
 - Copyright (c) 2009-2010 Johnny Morrice
 -
 - Permission is hereby granted, free of charge, to any person
 - obtaining a copy of this software and associated documentation 
 - files (the "Software"), to deal in the Software without 
 - restriction, including without limitation the rights to use, copy, 
 - modify, merge, publish, distribute, sublicense, and/or sell copies 
 - of the Software, and to permit persons to whom the Software is 
 - furnished to do so, subject to the following conditions:
 -
 - The above copyright notice and this permission notice shall be 
 - included in all copies or substantial portions of the Software.
 -
 - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 - SOFTWARE.
 -
-}

module Network.Shpider.Links 
   ( Link (..)
   , gatherLinks
   , allLinks
   )
   where

import Data.Maybe

import Text.HTML.TagSoup.Parsec
import Network.Shpider.TextUtils

-- | Parse all links from a list of tags.
gatherLinks :: [ Tag String ] -> [ Link ]
gatherLinks =
   tParse allLinks

-- | The parser responsible for getting all the links.
allLinks :: TagParser String [ Link ]
allLinks = do
   ls <- allWholeTags "a"
   return $ toLinks ls

toLinks tags =
   catMaybes $ map toLink tags

toLink ( TagOpen _ attrs , innerTags , _ ) = do
   address <- attrLookup "href" attrs
   return $ Link { linkAddress = address
                 , linkText = innerText innerTags
                 }
-- | Links have an address, corresponding to the href attribute, and some inner tex.
data Link =
   Link { linkAddress :: String
        , linkText :: String
        }
   deriving ( Show , Eq )