{-# LANGUAGE FlexibleInstances    #-}
{-# LANGUAGE TypeSynonymInstances #-}
-- |
-- Module: Web.WikiCFP.Scraper
-- Description: Scrape WikiCFP web site
-- Maintainer: Toshio Ito <debug.ito@gmail.com>
--
-- Synopsis:
--
-- > import qualified Network.HTTP as H
-- > import Web.WikiCFP.Scraper (scrapeSearchEvents)
-- >
-- > main :: IO ()
-- > main =  do
-- >   res <- H.getResponseBody =<< H.simpleHTTP (H.getRequest "http://wikicfp.com/cfp/servlet/tool.search?q=japan&year=t")
-- >   print $ scrapeSearchEvents res
--
-- This module scrapes WikiCFP pages (<http://wikicfp.com/>) for
-- call-for-papers. It helps you stay up to date with deadlines of
-- academic paper submissions.
module Web.WikiCFP.Scraper
    ( -- * Scraper routines
      scrapeConfEvents
    , scrapeSearchEvents
      -- * Types
    , ErrorMsg
    , HTML (..)
    , When (..)
    , Event (..)
    ) where

import qualified Data.ByteString             as SB
import qualified Data.ByteString.Lazy        as LB
import           Data.Text                   (Text, pack)
import           Data.Text.Encoding          (decodeUtf8')
import qualified Data.Text.Lazy              as LT
import           Text.HTML.Scalpel.Core      (scrapeStringLike)

import           Web.WikiCFP.Scraper.Scalpel (ErrorMsg, Scraper', confRoot, searchRoot)
import           Web.WikiCFP.Scraper.Type    (Event (..), When (..))


-- | Types of input HTML data to scrape.
class HTML a where
  decodeToText :: a -> Either ErrorMsg Text

instance HTML Text where
  decodeToText :: Text -> Either ErrorMsg Text
decodeToText = forall a b. b -> Either a b
Right

instance HTML LT.Text where
  decodeToText :: Text -> Either ErrorMsg Text
decodeToText = forall a b. b -> Either a b
Right forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
LT.toStrict

-- | It just assumes UTF-8 encoding.
instance HTML SB.ByteString where
  decodeToText :: ByteString -> Either ErrorMsg Text
decodeToText = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\UnicodeException
e -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ ErrorMsg
"UTF-8 decoding error: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> ErrorMsg
show UnicodeException
e) forall a b. b -> Either a b
Right forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Either UnicodeException Text
decodeUtf8'

instance HTML LB.ByteString where
  decodeToText :: ByteString -> Either ErrorMsg Text
decodeToText = forall a. HTML a => a -> Either ErrorMsg Text
decodeToText forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
LB.toStrict

instance HTML String where
  decodeToText :: ErrorMsg -> Either ErrorMsg Text
decodeToText = forall a b. b -> Either a b
Right forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorMsg -> Text
pack

runScraper :: Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a
runScraper :: forall a. Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a
runScraper Scraper' (Either ErrorMsg a)
s Text
input = forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left ErrorMsg
"Scraping error") forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ forall str a. StringLike str => str -> Scraper str a -> Maybe a
scrapeStringLike Text
input Scraper' (Either ErrorMsg a)
s

-- | Scrape a page of a conference, for example,
-- <http://wikicfp.com/cfp/program?id=2671>
scrapeConfEvents :: HTML input => input -> Either ErrorMsg [Event]
scrapeConfEvents :: forall input. HTML input => input -> Either ErrorMsg [Event]
scrapeConfEvents input
t = forall a. Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a
runScraper Scraper' (Either ErrorMsg [Event])
confRoot forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall a. HTML a => a -> Either ErrorMsg Text
decodeToText input
t

-- | Scrape a page of search results, for example,
-- <http://wikicfp.com/cfp/servlet/tool.search?q=cloud&year=t>
scrapeSearchEvents :: HTML input => input -> Either ErrorMsg [Event]
scrapeSearchEvents :: forall input. HTML input => input -> Either ErrorMsg [Event]
scrapeSearchEvents input
t = forall a. Scraper' (Either ErrorMsg a) -> Text -> Either ErrorMsg a
runScraper Scraper' (Either ErrorMsg [Event])
searchRoot forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall a. HTML a => a -> Either ErrorMsg Text
decodeToText input
t