{-# LANGUAGE TypeSynonymInstances, FlexibleInstances #-}
-- |
-- Module: Web.WikiCFP.Scraper
-- Description: Scrape WikiCFP web site
-- Maintainer: Toshio Ito <debug.ito@gmail.com>
-- 
-- Synopsis:
--
-- > import qualified Network.HTTP as H
-- > import Web.WikiCFP.Scraper (scrapeSearchEvents)
-- > 
-- > main :: IO ()
-- > main =  do
-- >   res <- H.getResponseBody =<< H.simpleHTTP (H.getRequest "http://wikicfp.com/cfp/servlet/tool.search?q=japan&year=t")
-- >   print $ scrapeSearchEvents res
--
-- This module scrapes WikiCFP pages (<http://wikicfp.com/>) for
-- call-for-papers. It helps you stay up to date with deadlines of
-- academic paper submissions.
module Web.WikiCFP.Scraper
       ( -- * Scraper routines
         scrapeConfEvents,
         scrapeSearchEvents,
         -- * Types
         ErrorMsg,
         HTML(..),
         When(..),
         Event(..)
       ) where

import qualified Data.ByteString as SB
import qualified Data.ByteString.Lazy as LB
import Data.Text (Text, pack)
import Data.Text.Encoding (decodeUtf8')
import qualified Data.Text.Lazy as LT
import Text.HTML.Scalpel.Core (scrapeStringLike)

import Web.WikiCFP.Scraper.Type (When(..), Event(..))
import Web.WikiCFP.Scraper.Scalpel (ErrorMsg, Scraper', confRoot, searchRoot)


-- | Types of input HTML data to scrape.
class HTML a where
  decodeToText :: a -> Either ErrorMsg Text

instance HTML Text where
  decodeToText :: Text -> Either ErrorMsg Text
decodeToText = Text -> Either ErrorMsg Text
forall a b. b -> Either a b
Right

instance HTML LT.Text where
  decodeToText :: Text -> Either ErrorMsg Text
decodeToText = Text -> Either ErrorMsg Text
forall a b. b -> Either a b
Right (Text -> Either ErrorMsg Text)
-> (Text -> Text) -> Text -> Either ErrorMsg Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
LT.toStrict

-- | It just assumes UTF-8 encoding.
instance HTML SB.ByteString where
  decodeToText :: ByteString -> Either ErrorMsg Text
decodeToText = (UnicodeException -> Either ErrorMsg Text)
-> (Text -> Either ErrorMsg Text)
-> Either UnicodeException Text
-> Either ErrorMsg Text
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\UnicodeException
e -> ErrorMsg -> Either ErrorMsg Text
forall a b. a -> Either a b
Left (ErrorMsg -> Either ErrorMsg Text)
-> ErrorMsg -> Either ErrorMsg Text
forall a b. (a -> b) -> a -> b
$ ErrorMsg
"UTF-8 decoding error: " ErrorMsg -> ErrorMsg -> ErrorMsg
forall a. [a] -> [a] -> [a]
++ UnicodeException -> ErrorMsg
forall a. Show a => a -> ErrorMsg
show UnicodeException
e) Text -> Either ErrorMsg Text
forall a b. b -> Either a b
Right (Either UnicodeException Text -> Either ErrorMsg Text)
-> (ByteString -> Either UnicodeException Text)
-> ByteString
-> Either ErrorMsg Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
decodeUtf8'

instance HTML LB.ByteString where
  decodeToText :: ByteString -> Either ErrorMsg Text
decodeToText = ByteString -> Either ErrorMsg Text
forall a. HTML a => a -> Either ErrorMsg Text
decodeToText (ByteString -> Either ErrorMsg Text)
-> (ByteString -> ByteString) -> ByteString -> Either ErrorMsg Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
LB.toStrict

instance HTML String where
  decodeToText :: ErrorMsg -> Either ErrorMsg Text
decodeToText = Text -> Either ErrorMsg Text
forall a b. b -> Either a b
Right (Text -> Either ErrorMsg Text)
-> (ErrorMsg -> Text) -> ErrorMsg -> Either ErrorMsg Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorMsg -> Text
pack

runScraper :: Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a
runScraper :: Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a
runScraper Scraper' (Either ErrorMsg a)
s Text
input = Either ErrorMsg a
-> (Either ErrorMsg a -> Either ErrorMsg a)
-> Maybe (Either ErrorMsg a)
-> Either ErrorMsg a
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ErrorMsg -> Either ErrorMsg a
forall a b. a -> Either a b
Left ErrorMsg
"Scraping error") Either ErrorMsg a -> Either ErrorMsg a
forall a. a -> a
id (Maybe (Either ErrorMsg a) -> Either ErrorMsg a)
-> Maybe (Either ErrorMsg a) -> Either ErrorMsg a
forall a b. (a -> b) -> a -> b
$ Text -> Scraper' (Either ErrorMsg a) -> Maybe (Either ErrorMsg a)
forall str a. StringLike str => str -> Scraper str a -> Maybe a
scrapeStringLike Text
input Scraper' (Either ErrorMsg a)
s

-- | Scrape a page of a conference, for example,
-- <http://wikicfp.com/cfp/program?id=2671>
scrapeConfEvents :: HTML input => input -> Either ErrorMsg [Event]
scrapeConfEvents :: input -> Either ErrorMsg [Event]
scrapeConfEvents input
t = Scraper' (Either ErrorMsg [Event])
-> Text -> Either ErrorMsg [Event]
forall a. Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a
runScraper Scraper' (Either ErrorMsg [Event])
confRoot (Text -> Either ErrorMsg [Event])
-> Either ErrorMsg Text -> Either ErrorMsg [Event]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< input -> Either ErrorMsg Text
forall a. HTML a => a -> Either ErrorMsg Text
decodeToText input
t

-- | Scrape a page of search results, for example,
-- <http://wikicfp.com/cfp/servlet/tool.search?q=cloud&year=t>
scrapeSearchEvents :: HTML input => input -> Either ErrorMsg [Event]
scrapeSearchEvents :: input -> Either ErrorMsg [Event]
scrapeSearchEvents input
t = Scraper' (Either ErrorMsg [Event])
-> Text -> Either ErrorMsg [Event]
forall a. Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a
runScraper Scraper' (Either ErrorMsg [Event])
searchRoot (Text -> Either ErrorMsg [Event])
-> Either ErrorMsg Text -> Either ErrorMsg [Event]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< input -> Either ErrorMsg Text
forall a. HTML a => a -> Either ErrorMsg Text
decodeToText input
t