{-# OPTIONS_HADDOCK hide #-} module Text.HTML.Scalpel.Internal.Scrape.URL ( URL , scrapeURL , scrapeURLWithOpts ) where import Text.HTML.Scalpel.Internal.Scrape import Control.Applicative import qualified Data.ByteString as BS import qualified Network.Curl as Curl import qualified Text.HTML.TagSoup as TagSoup import qualified Text.StringLike as TagSoup type URL = String -- | The 'scrapeURL' function downloads the contents of the given URL and -- executes a 'Scraper' on it. scrapeURL :: TagSoup.StringLike str => URL -> Scraper str a -> IO (Maybe a) scrapeURL = scrapeURLWithOpts [Curl.CurlFollowLocation True] -- | The 'scrapeURLWithOpts' function take a list of curl options and downloads -- the contents of the given URL and executes a 'Scraper' on it. scrapeURLWithOpts :: TagSoup.StringLike str => [Curl.CurlOption] -> URL -> Scraper str a -> IO (Maybe a) scrapeURLWithOpts options url scraper = do maybeTags <- downloadAsTags url return (maybeTags >>= scrape scraper) where downloadAsTags url = do maybeBytes <- openURIWithOpts url options return $ (TagSoup.parseTags . TagSoup.castString) <$> maybeBytes openURIWithOpts :: URL -> [Curl.CurlOption] -> IO (Maybe BS.ByteString) openURIWithOpts url opts = do resp <- curlGetResponse_ url opts return $ if Curl.respCurlCode resp /= Curl.CurlOK then Nothing else Just $ Curl.respBody resp curlGetResponse_ :: URL -> [Curl.CurlOption] -> IO (Curl.CurlResponse_ [(String, String)] BS.ByteString) curlGetResponse_ = Curl.curlGetResponse_