{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-} -- | -- Module: Web.WikiCFP.Scraper -- Description: Scrape WikiCFP web site -- Maintainer: Toshio Ito -- -- Synopsis: -- -- > import qualified Network.HTTP as H -- > import Web.WikiCFP.Scraper (scrapeSearchEvents) -- > -- > main :: IO () -- > main = do -- > res <- H.getResponseBody =<< H.simpleHTTP (H.getRequest "http://wikicfp.com/cfp/servlet/tool.search?q=japan&year=t") -- > print $ scrapeSearchEvents res -- -- This module scrapes WikiCFP pages () for -- call-for-papers. It helps you stay up to date with deadlines of -- academic paper submissions. module Web.WikiCFP.Scraper ( -- * Scraper routines scrapeConfEvents, scrapeSearchEvents, -- * Types ErrorMsg, HTML(..), When(..), Event(..) ) where import qualified Data.ByteString as SB import qualified Data.ByteString.Lazy as LB import Data.Text (Text, pack) import Data.Text.Encoding (decodeUtf8') import qualified Data.Text.Lazy as LT import Text.HTML.Scalpel.Core (scrapeStringLike) import Web.WikiCFP.Scraper.Type (When(..), Event(..)) import Web.WikiCFP.Scraper.Scalpel (ErrorMsg, Scraper', confRoot, searchRoot) -- | Types of input HTML data to scrape. class HTML a where decodeToText :: a -> Either ErrorMsg Text instance HTML Text where decodeToText = Right instance HTML LT.Text where decodeToText = Right . LT.toStrict -- | It just assumes UTF-8 encoding. instance HTML SB.ByteString where decodeToText = either (\e -> Left $ "UTF-8 decoding error: " ++ show e) Right . decodeUtf8' instance HTML LB.ByteString where decodeToText = decodeToText . LB.toStrict instance HTML String where decodeToText = Right . pack runScraper :: Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a runScraper s input = maybe (Left "Scraping error") id $ scrapeStringLike input s -- | Scrape a page of a conference, for example, -- scrapeConfEvents :: HTML input => input -> Either ErrorMsg [Event] scrapeConfEvents t = runScraper confRoot =<< decodeToText t -- | Scrape a page of search results, for example, -- scrapeSearchEvents :: HTML input => input -> Either ErrorMsg [Event] scrapeSearchEvents t = runScraper searchRoot =<< decodeToText t