module ScrapeReddit where
import Network.HTTP.Client (Manager)
import RIO
import qualified RIO.Text as Text
import RIO.Time (UTCTime, defaultTimeLocale, iso8601DateFormat, parseTimeM)
import Text.HTML.Scalpel
( Config (..),
Scraper,
anySelector,
attr,
chroot,
chroots,
defaultDecoder,
hasClass,
scrapeURLWithConfig,
text,
(@:),
)
data Link = Link
{ Link -> Text
title :: Text,
Link -> String
href :: String,
Link -> Maybe Int
currentScore :: Maybe Int,
:: Maybe Int,
Link -> Maybe UTCTime
date :: Maybe UTCTime
}
deriving (Link -> Link -> Bool
(Link -> Link -> Bool) -> (Link -> Link -> Bool) -> Eq Link
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Link -> Link -> Bool
$c/= :: Link -> Link -> Bool
== :: Link -> Link -> Bool
$c== :: Link -> Link -> Bool
Eq, Int -> Link -> ShowS
[Link] -> ShowS
Link -> String
(Int -> Link -> ShowS)
-> (Link -> String) -> ([Link] -> ShowS) -> Show Link
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Link] -> ShowS
$cshowList :: [Link] -> ShowS
show :: Link -> String
$cshow :: Link -> String
showsPrec :: Int -> Link -> ShowS
$cshowsPrec :: Int -> Link -> ShowS
Show, (forall x. Link -> Rep Link x)
-> (forall x. Rep Link x -> Link) -> Generic Link
forall x. Rep Link x -> Link
forall x. Link -> Rep Link x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep Link x -> Link
$cfrom :: forall x. Link -> Rep Link x
Generic)
scrapeSubReddit :: Manager -> String -> IO (Maybe [Link])
scrapeSubReddit :: Manager -> String -> IO (Maybe [Link])
scrapeSubReddit Manager
manager String
subReddit =
Config Text -> String -> Scraper Text [Link] -> IO (Maybe [Link])
forall str a.
StringLike str =>
Config str -> String -> Scraper str a -> IO (Maybe a)
scrapeURLWithConfig
(Config :: forall str. Decoder str -> Maybe Manager -> Config str
Config {decoder :: Decoder Text
decoder = Decoder Text
forall str. StringLike str => Decoder str
defaultDecoder, manager :: Maybe Manager
manager = Manager -> Maybe Manager
forall a. a -> Maybe a
Just Manager
manager})
([String] -> String
forall a. Monoid a => [a] -> a
mconcat [String
"https://old.reddit.com/r/", String
subReddit, String
"/"])
Scraper Text [Link]
links
links :: Scraper Text [Link]
links :: Scraper Text [Link]
links = Selector -> ScraperT Text Identity Link -> Scraper Text [Link]
forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m [a]
chroots (TagName
"div" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"thing", String -> AttributePredicate
hasClass String
"link"]) ScraperT Text Identity Link
link'
link' :: Scraper Text Link
link' :: ScraperT Text Identity Link
link' = do
Text
title <- Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text (Selector -> ScraperT Text Identity Text)
-> Selector -> ScraperT Text Identity Text
forall a b. (a -> b) -> a -> b
$ TagName
"a" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"title"]
String
href <- Text -> String
Text.unpack (Text -> String)
-> ScraperT Text Identity Text -> ScraperT Text Identity String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
String -> Selector -> ScraperT str m str
attr String
"href" (TagName
"a" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"title"])
Maybe Int
currentScore <- (Text -> String
Text.unpack (Text -> String) -> (String -> Maybe Int) -> Text -> Maybe Int
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> Maybe Int
forall a. Read a => String -> Maybe a
readMaybe) (Text -> Maybe Int)
-> ScraperT Text Identity Text
-> ScraperT Text Identity (Maybe Int)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
String -> Selector -> ScraperT str m str
attr String
"title" (TagName
"div" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"score"])
Maybe Int
comments <- Selector
-> ScraperT Text Identity (Maybe Int)
-> ScraperT Text Identity (Maybe Int)
forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m a
chroot (TagName
"a" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"comments"]) ScraperT Text Identity (Maybe Int)
commentNumber
Maybe UTCTime
date <- Selector
-> ScraperT Text Identity (Maybe UTCTime)
-> ScraperT Text Identity (Maybe UTCTime)
forall str (m :: * -> *) a.
(StringLike str, Monad m) =>
Selector -> ScraperT str m a -> ScraperT str m a
chroot (TagName
"time" TagName -> [AttributePredicate] -> Selector
@: [String -> AttributePredicate
hasClass String
"live-timestamp"]) ScraperT Text Identity (Maybe UTCTime)
dateTimeFromTime
pure $ Link :: Text -> String -> Maybe Int -> Maybe Int -> Maybe UTCTime -> Link
Link {Text
title :: Text
$sel:title:Link :: Text
title, String
href :: String
$sel:href:Link :: String
href, Maybe Int
currentScore :: Maybe Int
$sel:currentScore:Link :: Maybe Int
currentScore, Maybe Int
comments :: Maybe Int
$sel:comments:Link :: Maybe Int
comments, Maybe UTCTime
date :: Maybe UTCTime
$sel:date:Link :: Maybe UTCTime
date}
commentNumber :: Scraper Text (Maybe Int)
= do
String
commentLinkText <- Text -> String
Text.unpack (Text -> String)
-> ScraperT Text Identity Text -> ScraperT Text Identity String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(StringLike str, Monad m) =>
Selector -> ScraperT str m str
text Selector
anySelector
case String -> [String]
words String
commentLinkText of
[String
numberText, String
_comments] -> Maybe Int -> ScraperT Text Identity (Maybe Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe Int -> ScraperT Text Identity (Maybe Int))
-> Maybe Int -> ScraperT Text Identity (Maybe Int)
forall a b. (a -> b) -> a -> b
$ String -> Maybe Int
forall a. Read a => String -> Maybe a
readMaybe String
numberText
[String]
_anythingElse -> Maybe Int -> ScraperT Text Identity (Maybe Int)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe Int
forall a. Maybe a
Nothing
dateTimeFromTime :: Scraper Text (Maybe UTCTime)
dateTimeFromTime :: ScraperT Text Identity (Maybe UTCTime)
dateTimeFromTime = do
String
timeString <- Text -> String
Text.unpack (Text -> String)
-> ScraperT Text Identity Text -> ScraperT Text Identity String
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Selector -> ScraperT Text Identity Text
forall str (m :: * -> *).
(Show str, StringLike str, Monad m) =>
String -> Selector -> ScraperT str m str
attr String
"datetime" Selector
anySelector
pure $ Bool -> TimeLocale -> String -> String -> Maybe UTCTime
forall (m :: * -> *) t.
(MonadFail m, ParseTime t) =>
Bool -> TimeLocale -> String -> String -> m t
parseTimeM Bool
True TimeLocale
defaultTimeLocale String
dateFormat String
timeString
dateFormat :: String
dateFormat :: String
dateFormat = Maybe String -> String
iso8601DateFormat (Maybe String -> String) -> Maybe String -> String
forall a b. (a -> b) -> a -> b
$ String -> Maybe String
forall a. a -> Maybe a
Just String
"%H:%M:%S+00:00"