-- |
-- Module: Web.WikiCFP.Scraper.Scalpel
-- Description: Scraper implementation with Scalpel
-- Maintainer: Toshio Ito <debug.ito@gmail.com>
--
--
{-# LANGUAGE CPP               #-}
{-# LANGUAGE OverloadedStrings #-}

module Web.WikiCFP.Scraper.Scalpel
    ( ErrorMsg
    , Scraper'
    , confRoot
    , searchRoot
    ) where

import           Control.Applicative      (optional, pure, (*>), (<$>), (<*), (<*>), (<|>))
import           Control.Monad            (forM_, guard, mzero)
import           Data.Attoparsec.Text     (Parser, char, decimal, endOfInput, parseOnly, skipSpace,
                                           string, takeText)
import           Data.List                (sort)
import           Data.Maybe               (catMaybes)
import           Data.Monoid              ((<>))
import           Data.Text                (Text, pack)
import           Data.Time                (Day, fromGregorian)
import           Text.HTML.Scalpel.Core   (Scraper, attr, chroot, chroots, hasClass, text, texts,
                                           (//), (@:), (@=))

import           Web.WikiCFP.Scraper.Type (Event (..), When (..))

type ErrorMsg = String

type Scraper' = Scraper Text

-- | Root scraper for conference Events.
confRoot :: Scraper' (Either ErrorMsg [Event])
confRoot :: Scraper' (Either ErrorMsg [Event])
confRoot = forall (m :: * -> *) e a.
(Functor m, Monad m) =>
m [Either e [a]] -> m (Either e [a])
concatSuccess forall a b. (a -> b) -> a -> b
$ forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m [a]
chroots (TagName
"div" TagName -> [AttributePredicate] -> Selector
@: [ErrorMsg -> AttributePredicate
hasClass ErrorMsg
"contsec"] Selector -> Selector -> Selector
// Selector
"table") forall a b. (a -> b) -> a -> b
$ Scraper' (Either ErrorMsg [Event])
eventsTable

-- | Root scraper for searched Events.
searchRoot :: Scraper' (Either ErrorMsg [Event])
searchRoot :: Scraper' (Either ErrorMsg [Event])
searchRoot = Scraper' (Either ErrorMsg [Event])
confRoot

concatSuccess :: (Functor m, Monad m) => m [Either e [a]] -> m (Either e [a])
concatSuccess :: forall (m :: * -> *) e a.
(Functor m, Monad m) =>
m [Either e [a]] -> m (Either e [a])
concatSuccess = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence

-- | Scrape events from a table. Use with the root at @\<table\>@ tag.
eventsTable :: Scraper' (Either ErrorMsg [Event])
eventsTable :: Scraper' (Either ErrorMsg [Event])
eventsTable = do
  [EventRow]
rows <- forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m [a]
chroots Selector
"tr" (ScraperT Text Identity EventRow
eventRow1 forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ScraperT Text Identity EventRow
eventRow2 forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ScraperT Text Identity EventRow
eventRowHeader)
  case [EventRow]
rows of
    (EventRow
EventRowHeader : [EventRow]
rest) -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [EventRow] -> Either ErrorMsg [Event]
rowsToEvents_noHeader [EventRow]
rest
    [EventRow]
_                       -> forall (m :: * -> *) a. MonadPlus m => m a
mzero


-- | Intermediate result of parsing under events \<table\>.
data EventRow
  = EventRowHeader
  | EventRow1 Text Text Text -- ^ shortName, URL and longName
  | EventRow2 Text Text Text -- ^ when, where, deadlines
  deriving (EventRow -> EventRow -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EventRow -> EventRow -> Bool
$c/= :: EventRow -> EventRow -> Bool
== :: EventRow -> EventRow -> Bool
$c== :: EventRow -> EventRow -> Bool
Eq, Eq EventRow
EventRow -> EventRow -> Bool
EventRow -> EventRow -> Ordering
EventRow -> EventRow -> EventRow
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: EventRow -> EventRow -> EventRow
$cmin :: EventRow -> EventRow -> EventRow
max :: EventRow -> EventRow -> EventRow
$cmax :: EventRow -> EventRow -> EventRow
>= :: EventRow -> EventRow -> Bool
$c>= :: EventRow -> EventRow -> Bool
> :: EventRow -> EventRow -> Bool
$c> :: EventRow -> EventRow -> Bool
<= :: EventRow -> EventRow -> Bool
$c<= :: EventRow -> EventRow -> Bool
< :: EventRow -> EventRow -> Bool
$c< :: EventRow -> EventRow -> Bool
compare :: EventRow -> EventRow -> Ordering
$ccompare :: EventRow -> EventRow -> Ordering
Ord, Int -> EventRow -> ShowS
[EventRow] -> ShowS
EventRow -> ErrorMsg
forall a.
(Int -> a -> ShowS) -> (a -> ErrorMsg) -> ([a] -> ShowS) -> Show a
showList :: [EventRow] -> ShowS
$cshowList :: [EventRow] -> ShowS
show :: EventRow -> ErrorMsg
$cshow :: EventRow -> ErrorMsg
showsPrec :: Int -> EventRow -> ShowS
$cshowsPrec :: Int -> EventRow -> ShowS
Show)

-- | Scrape header row. Use with the root at @\<tr\>@ tag.
eventRowHeader :: Scraper' EventRow
eventRowHeader :: ScraperT Text Identity EventRow
eventRowHeader = do
  [Text]
tds <- forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m [str]
texts Selector
"td"
  forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
tds forall a. Eq a => a -> a -> Bool
== Int
4
  -- We cannot use OverloadedStrings with scalpel (as of 0.3.0.1),
  -- because it requires explicit (:: String) declarations everywhere!
  let expected_labels :: [Text]
expected_labels = forall a b. (a -> b) -> [a] -> [b]
map ErrorMsg -> Text
pack [ErrorMsg
"Event", ErrorMsg
"When", ErrorMsg
"Where", ErrorMsg
"Deadline"]
  forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0..(forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
expected_labels forall a. Num a => a -> a -> a
- Int
1)] forall a b. (a -> b) -> a -> b
$ \Int
i -> forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ forall a. Parser a -> Text -> Bool
parsable (Text -> Parser Text
spacedText forall a b. (a -> b) -> a -> b
$ [Text]
expected_labels forall a. [a] -> Int -> a
!! Int
i) forall a b. (a -> b) -> a -> b
$ [Text]
tds forall a. [a] -> Int -> a
!! Int
i
  forall (m :: * -> *) a. Monad m => a -> m a
return EventRow
EventRowHeader

-- | Scrape shortName, URL and longName. Use with the root at @\<tr\>@ tag.
eventRow1 :: Scraper' EventRow
eventRow1 :: ScraperT Text Identity EventRow
eventRow1 = do
  (Text
sname, Text
url) <- forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m a
chroot (TagName
"td" TagName -> [AttributePredicate] -> Selector
@: [AttributeName
"rowspan" AttributeName -> ErrorMsg -> AttributePredicate
@= ErrorMsg
"2"]) forall a b. (a -> b) -> a -> b
$ ((,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text Selector
"a" forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
ErrorMsg -> Selector -> ScraperT str m str
attr ErrorMsg
"href" Selector
"a")
  Text
lname <- forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text (TagName
"td" TagName -> [AttributePredicate] -> Selector
@: [AttributeName
"colspan" AttributeName -> ErrorMsg -> AttributePredicate
@= ErrorMsg
"3"])
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text -> EventRow
EventRow1 Text
sname Text
url Text
lname

-- | Scrape when, where, deadlines in Texts. Use the the root at @\<tr\>@ tag.
eventRow2 :: Scraper' EventRow
eventRow2 :: ScraperT Text Identity EventRow
eventRow2 = do
  [Text]
tds <- forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m [str]
texts Selector
"td"
  forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
tds forall a. Eq a => a -> a -> Bool
== Int
3
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text -> EventRow
EventRow2 ([Text]
tds forall a. [a] -> Int -> a
!! Int
0) ([Text]
tds forall a. [a] -> Int -> a
!! Int
1) ([Text]
tds forall a. [a] -> Int -> a
!! Int
2)

rowsToEvents_noHeader :: [EventRow] -> Either ErrorMsg [Event]
rowsToEvents_noHeader :: [EventRow] -> Either ErrorMsg [Event]
rowsToEvents_noHeader [] = forall (m :: * -> *) a. Monad m => a -> m a
return []
rowsToEvents_noHeader ((EventRow1 Text
sn Text
url Text
ln) : (EventRow2 Text
when Text
wher Text
dl) : [EventRow]
rest) =
  (:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text
-> Text -> Text -> Text -> Text -> Text -> Either ErrorMsg Event
createEvent Text
sn Text
url Text
ln Text
when Text
wher Text
dl forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [EventRow] -> Either ErrorMsg [Event]
rowsToEvents_noHeader [EventRow]
rest
rowsToEvents_noHeader [EventRow]
rows = forall a b. a -> Either a b
Left (ErrorMsg
"Error while parsing rows: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> ErrorMsg
show [EventRow]
rows)

-- | TODO: make it configurable.
urlBase :: Text
urlBase :: Text
urlBase = ErrorMsg -> Text
pack ErrorMsg
"http://wikicfp.com"

createEvent :: Text -> Text -> Text -> Text -> Text -> Text -> Either ErrorMsg Event
createEvent :: Text
-> Text -> Text -> Text -> Text -> Text -> Either ErrorMsg Event
createEvent Text
sname Text
url Text
lname Text
when Text
wher Text
dl = do
  Maybe When
when' <- Text -> Either ErrorMsg (Maybe When)
parseWhen Text
when
  Maybe Text
wher' <- Text -> Either ErrorMsg (Maybe Text)
parseWhere Text
wher
  [Day]
dl' <- Text -> Either ErrorMsg [Day]
parseDeadlines Text
dl
  forall (m :: * -> *) a. Monad m => a -> m a
return Event { eventShortName :: Text
eventShortName = Text
sname,
                 eventURL :: Text
eventURL = Text
urlBase forall a. Semigroup a => a -> a -> a
<> Text
url,
                 eventLongName :: Text
eventLongName = Text
lname,
                 eventWhen :: Maybe When
eventWhen = Maybe When
when',
                 eventWhere :: Maybe Text
eventWhere = Maybe Text
wher',
                 eventDeadlines :: [Day]
eventDeadlines = [Day]
dl'
               }

-- * Parsers

parsable :: Parser a -> Text -> Bool
parsable :: forall a. Parser a -> Text -> Bool
parsable Parser a
p Text
t = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const Bool
False) (forall a b. a -> b -> a
const Bool
True) forall a b. (a -> b) -> a -> b
$ forall a. Parser a -> Text -> Either ErrorMsg a
parseOnly Parser a
p Text
t

spacedText :: Text -> Parser Text
spacedText :: Text -> Parser Text
spacedText Text
expected = Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
string Text
expected forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput

spacedText' :: String -> Parser Text
spacedText' :: ErrorMsg -> Parser Text
spacedText' = Text -> Parser Text
spacedText forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorMsg -> Text
pack

string' :: String -> Parser Text
string' :: ErrorMsg -> Parser Text
string' = Text -> Parser Text
string forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorMsg -> Text
pack

parseWhen :: Text -> Either ErrorMsg (Maybe When)
parseWhen :: Text -> Either ErrorMsg (Maybe When)
parseWhen = forall a. Parser a -> Text -> Either ErrorMsg a
parseOnly (Parser Text (Maybe When)
parserWhen forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) where
  parserWhen :: Parser Text (Maybe When)
parserWhen = (ErrorMsg -> Parser Text
spacedText' ErrorMsg
"N/A" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing)
               forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text When
parserJustWhen)
  parserJustWhen :: Parser Text When
parserJustWhen = Day -> Day -> When
When
                   forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser Text Day
parserDay forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Char -> Parser Char
char Char
'-') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace)
                   forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Text Day
parserDay forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace)

parseWhere :: Text -> Either ErrorMsg (Maybe Text)
parseWhere :: Text -> Either ErrorMsg (Maybe Text)
parseWhere = forall a. Parser a -> Text -> Either ErrorMsg a
parseOnly (Parser Text (Maybe Text)
parserWhere forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) where
  parserWhere :: Parser Text (Maybe Text)
parserWhere = (ErrorMsg -> Parser Text
spacedText' ErrorMsg
"N/A" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
takeText)

parseDeadlines :: Text -> Either ErrorMsg [Day]
parseDeadlines :: Text -> Either ErrorMsg [Day]
parseDeadlines Text
input = forall a. Ord a => [a] -> [a]
sort forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Parser a -> Text -> Either ErrorMsg a
parseOnly (Parser Text [Day]
parserDeadlines forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) Text
input where
  parserDeadlines :: Parser Text [Day]
parserDeadlines = Parser Text [Day]
strictParserDeadlines forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text
takeText forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return [])
  strictParserDeadlines :: Parser Text [Day]
strictParserDeadlines = do
    Day
primary <- Parser Text Day
parserDay forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
    Maybe Day
msecondary <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'(' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text Day
parserDay forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
')')
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Day
primary] (forall a. a -> [a] -> [a]
: [Day
primary]) Maybe Day
msecondary

parserDay :: Parser Day
parserDay :: Parser Text Day
parserDay = Parser Text Day
impl where
  impl :: Parser Text Day
impl = do
    Int
m <- Parser Text Int
parserMonth forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
    Int
d <- forall a. Integral a => Parser a
decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
',') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
    Year
y <- forall a. Integral a => Parser a
decimal
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Year -> Int -> Int -> Day
fromGregorian Year
y Int
m Int
d
  parserMonth :: Parser Text Int
parserMonth =     (ErrorMsg -> Parser Text
string' ErrorMsg
"Jan" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
1)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Feb" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
2)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Mar" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
3)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Apr" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
4)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"May" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
5)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Jun" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
6)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Jul" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
7)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Aug" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
8)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Sep" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
9)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Oct" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
10)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Nov" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
11)
                forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Dec" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
12)