{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Web.WikiCFP.Scraper.Scalpel
( ErrorMsg
, Scraper'
, confRoot
, searchRoot
) where
import Control.Applicative (optional, pure, (*>), (<$>), (<*), (<*>), (<|>))
import Control.Monad (forM_, guard, mzero)
import Data.Attoparsec.Text (Parser, char, decimal, endOfInput, parseOnly, skipSpace,
string, takeText)
import Data.List (sort)
import Data.Maybe (catMaybes)
import Data.Monoid ((<>))
import Data.Text (Text, pack)
import Data.Time (Day, fromGregorian)
import Text.HTML.Scalpel.Core (Scraper, attr, chroot, chroots, hasClass, text, texts,
(//), (@:), (@=))
import Web.WikiCFP.Scraper.Type (Event (..), When (..))
type ErrorMsg = String
type Scraper' = Scraper Text
confRoot :: Scraper' (Either ErrorMsg [Event])
confRoot :: Scraper' (Either ErrorMsg [Event])
confRoot = forall (m :: * -> *) e a.
(Functor m, Monad m) =>
m [Either e [a]] -> m (Either e [a])
concatSuccess forall a b. (a -> b) -> a -> b
$ forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m [a]
chroots (TagName
"div" TagName -> [AttributePredicate] -> Selector
@: [ErrorMsg -> AttributePredicate
hasClass ErrorMsg
"contsec"] Selector -> Selector -> Selector
// Selector
"table") forall a b. (a -> b) -> a -> b
$ Scraper' (Either ErrorMsg [Event])
eventsTable
searchRoot :: Scraper' (Either ErrorMsg [Event])
searchRoot :: Scraper' (Either ErrorMsg [Event])
searchRoot = Scraper' (Either ErrorMsg [Event])
confRoot
concatSuccess :: (Functor m, Monad m) => m [Either e [a]] -> m (Either e [a])
concatSuccess :: forall (m :: * -> *) e a.
(Functor m, Monad m) =>
m [Either e [a]] -> m (Either e [a])
concatSuccess = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence
eventsTable :: Scraper' (Either ErrorMsg [Event])
eventsTable :: Scraper' (Either ErrorMsg [Event])
eventsTable = do
[EventRow]
rows <- forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m [a]
chroots Selector
"tr" (ScraperT Text Identity EventRow
eventRow1 forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ScraperT Text Identity EventRow
eventRow2 forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ScraperT Text Identity EventRow
eventRowHeader)
case [EventRow]
rows of
(EventRow
EventRowHeader : [EventRow]
rest) -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [EventRow] -> Either ErrorMsg [Event]
rowsToEvents_noHeader [EventRow]
rest
[EventRow]
_ -> forall (m :: * -> *) a. MonadPlus m => m a
mzero
data EventRow
=
| EventRow1 Text Text Text
| EventRow2 Text Text Text
deriving (EventRow -> EventRow -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: EventRow -> EventRow -> Bool
$c/= :: EventRow -> EventRow -> Bool
== :: EventRow -> EventRow -> Bool
$c== :: EventRow -> EventRow -> Bool
Eq, Eq EventRow
EventRow -> EventRow -> Bool
EventRow -> EventRow -> Ordering
EventRow -> EventRow -> EventRow
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: EventRow -> EventRow -> EventRow
$cmin :: EventRow -> EventRow -> EventRow
max :: EventRow -> EventRow -> EventRow
$cmax :: EventRow -> EventRow -> EventRow
>= :: EventRow -> EventRow -> Bool
$c>= :: EventRow -> EventRow -> Bool
> :: EventRow -> EventRow -> Bool
$c> :: EventRow -> EventRow -> Bool
<= :: EventRow -> EventRow -> Bool
$c<= :: EventRow -> EventRow -> Bool
< :: EventRow -> EventRow -> Bool
$c< :: EventRow -> EventRow -> Bool
compare :: EventRow -> EventRow -> Ordering
$ccompare :: EventRow -> EventRow -> Ordering
Ord, Int -> EventRow -> ShowS
[EventRow] -> ShowS
EventRow -> ErrorMsg
forall a.
(Int -> a -> ShowS) -> (a -> ErrorMsg) -> ([a] -> ShowS) -> Show a
showList :: [EventRow] -> ShowS
$cshowList :: [EventRow] -> ShowS
show :: EventRow -> ErrorMsg
$cshow :: EventRow -> ErrorMsg
showsPrec :: Int -> EventRow -> ShowS
$cshowsPrec :: Int -> EventRow -> ShowS
Show)
eventRowHeader :: Scraper' EventRow
= do
[Text]
tds <- forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m [str]
texts Selector
"td"
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
tds forall a. Eq a => a -> a -> Bool
== Int
4
let expected_labels :: [Text]
expected_labels = forall a b. (a -> b) -> [a] -> [b]
map ErrorMsg -> Text
pack [ErrorMsg
"Event", ErrorMsg
"When", ErrorMsg
"Where", ErrorMsg
"Deadline"]
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0..(forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
expected_labels forall a. Num a => a -> a -> a
- Int
1)] forall a b. (a -> b) -> a -> b
$ \Int
i -> forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ forall a. Parser a -> Text -> Bool
parsable (Text -> Parser Text
spacedText forall a b. (a -> b) -> a -> b
$ [Text]
expected_labels forall a. [a] -> Int -> a
!! Int
i) forall a b. (a -> b) -> a -> b
$ [Text]
tds forall a. [a] -> Int -> a
!! Int
i
forall (m :: * -> *) a. Monad m => a -> m a
return EventRow
EventRowHeader
eventRow1 :: Scraper' EventRow
eventRow1 :: ScraperT Text Identity EventRow
eventRow1 = do
(Text
sname, Text
url) <- forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m a
chroot (TagName
"td" TagName -> [AttributePredicate] -> Selector
@: [AttributeName
"rowspan" AttributeName -> ErrorMsg -> AttributePredicate
@= ErrorMsg
"2"]) forall a b. (a -> b) -> a -> b
$ ((,) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text Selector
"a" forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
ErrorMsg -> Selector -> ScraperT str m str
attr ErrorMsg
"href" Selector
"a")
Text
lname <- forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text (TagName
"td" TagName -> [AttributePredicate] -> Selector
@: [AttributeName
"colspan" AttributeName -> ErrorMsg -> AttributePredicate
@= ErrorMsg
"3"])
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text -> EventRow
EventRow1 Text
sname Text
url Text
lname
eventRow2 :: Scraper' EventRow
eventRow2 :: ScraperT Text Identity EventRow
eventRow2 = do
[Text]
tds <- forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m [str]
texts Selector
"td"
forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
tds forall a. Eq a => a -> a -> Bool
== Int
3
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Text -> Text -> EventRow
EventRow2 ([Text]
tds forall a. [a] -> Int -> a
!! Int
0) ([Text]
tds forall a. [a] -> Int -> a
!! Int
1) ([Text]
tds forall a. [a] -> Int -> a
!! Int
2)
rowsToEvents_noHeader :: [EventRow] -> Either ErrorMsg [Event]
[] = forall (m :: * -> *) a. Monad m => a -> m a
return []
rowsToEvents_noHeader ((EventRow1 Text
sn Text
url Text
ln) : (EventRow2 Text
when Text
wher Text
dl) : [EventRow]
rest) =
(:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text
-> Text -> Text -> Text -> Text -> Text -> Either ErrorMsg Event
createEvent Text
sn Text
url Text
ln Text
when Text
wher Text
dl forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> [EventRow] -> Either ErrorMsg [Event]
rowsToEvents_noHeader [EventRow]
rest
rowsToEvents_noHeader [EventRow]
rows = forall a b. a -> Either a b
Left (ErrorMsg
"Error while parsing rows: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> ErrorMsg
show [EventRow]
rows)
urlBase :: Text
urlBase :: Text
urlBase = ErrorMsg -> Text
pack ErrorMsg
"http://wikicfp.com"
createEvent :: Text -> Text -> Text -> Text -> Text -> Text -> Either ErrorMsg Event
createEvent :: Text
-> Text -> Text -> Text -> Text -> Text -> Either ErrorMsg Event
createEvent Text
sname Text
url Text
lname Text
when Text
wher Text
dl = do
Maybe When
when' <- Text -> Either ErrorMsg (Maybe When)
parseWhen Text
when
Maybe Text
wher' <- Text -> Either ErrorMsg (Maybe Text)
parseWhere Text
wher
[Day]
dl' <- Text -> Either ErrorMsg [Day]
parseDeadlines Text
dl
forall (m :: * -> *) a. Monad m => a -> m a
return Event { eventShortName :: Text
eventShortName = Text
sname,
eventURL :: Text
eventURL = Text
urlBase forall a. Semigroup a => a -> a -> a
<> Text
url,
eventLongName :: Text
eventLongName = Text
lname,
eventWhen :: Maybe When
eventWhen = Maybe When
when',
eventWhere :: Maybe Text
eventWhere = Maybe Text
wher',
eventDeadlines :: [Day]
eventDeadlines = [Day]
dl'
}
parsable :: Parser a -> Text -> Bool
parsable :: forall a. Parser a -> Text -> Bool
parsable Parser a
p Text
t = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const Bool
False) (forall a b. a -> b -> a
const Bool
True) forall a b. (a -> b) -> a -> b
$ forall a. Parser a -> Text -> Either ErrorMsg a
parseOnly Parser a
p Text
t
spacedText :: Text -> Parser Text
spacedText :: Text -> Parser Text
spacedText Text
expected = Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Text -> Parser Text
string Text
expected forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput
spacedText' :: String -> Parser Text
spacedText' :: ErrorMsg -> Parser Text
spacedText' = Text -> Parser Text
spacedText forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorMsg -> Text
pack
string' :: String -> Parser Text
string' :: ErrorMsg -> Parser Text
string' = Text -> Parser Text
string forall b c a. (b -> c) -> (a -> b) -> a -> c
. ErrorMsg -> Text
pack
parseWhen :: Text -> Either ErrorMsg (Maybe When)
parseWhen :: Text -> Either ErrorMsg (Maybe When)
parseWhen = forall a. Parser a -> Text -> Either ErrorMsg a
parseOnly (Parser Text (Maybe When)
parserWhen forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) where
parserWhen :: Parser Text (Maybe When)
parserWhen = (ErrorMsg -> Parser Text
spacedText' ErrorMsg
"N/A" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text When
parserJustWhen)
parserJustWhen :: Parser Text When
parserJustWhen = Day -> Day -> When
When
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Parser Text Day
parserDay forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Char -> Parser Char
char Char
'-') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Parser Text Day
parserDay forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace)
parseWhere :: Text -> Either ErrorMsg (Maybe Text)
parseWhere :: Text -> Either ErrorMsg (Maybe Text)
parseWhere = forall a. Parser a -> Text -> Either ErrorMsg a
parseOnly (Parser Text (Maybe Text)
parserWhere forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) where
parserWhere :: Parser Text (Maybe Text)
parserWhere = (ErrorMsg -> Parser Text
spacedText' ErrorMsg
"N/A" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a. Maybe a
Nothing) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (forall a. a -> Maybe a
Just forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
takeText)
parseDeadlines :: Text -> Either ErrorMsg [Day]
parseDeadlines :: Text -> Either ErrorMsg [Day]
parseDeadlines Text
input = forall a. Ord a => [a] -> [a]
sort forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. Parser a -> Text -> Either ErrorMsg a
parseOnly (Parser Text [Day]
parserDeadlines forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall t. Chunk t => Parser t ()
endOfInput) Text
input where
parserDeadlines :: Parser Text [Day]
parserDeadlines = Parser Text [Day]
strictParserDeadlines forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Text
takeText forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (m :: * -> *) a. Monad m => a -> m a
return [])
strictParserDeadlines :: Parser Text [Day]
strictParserDeadlines = do
Day
primary <- Parser Text Day
parserDay forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
Maybe Day
msecondary <- forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'(' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text Day
parserDay forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> Parser Char
char Char
')')
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall b a. b -> (a -> b) -> Maybe a -> b
maybe [Day
primary] (forall a. a -> [a] -> [a]
: [Day
primary]) Maybe Day
msecondary
parserDay :: Parser Day
parserDay :: Parser Text Day
parserDay = Parser Text Day
impl where
impl :: Parser Text Day
impl = do
Int
m <- Parser Text Int
parserMonth forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
Int
d <- forall a. Integral a => Parser a
decimal forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
optional forall a b. (a -> b) -> a -> b
$ Char -> Parser Char
char Char
',') forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
Year
y <- forall a. Integral a => Parser a
decimal
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Year -> Int -> Int -> Day
fromGregorian Year
y Int
m Int
d
parserMonth :: Parser Text Int
parserMonth = (ErrorMsg -> Parser Text
string' ErrorMsg
"Jan" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
1)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Feb" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
2)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Mar" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
3)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Apr" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
4)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"May" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
5)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Jun" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
6)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Jul" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
7)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Aug" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
8)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Sep" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
9)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Oct" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
10)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Nov" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
11)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (ErrorMsg -> Parser Text
string' ErrorMsg
"Dec" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a. Applicative f => a -> f a
pure Int
12)