module Chez.Grater.Scraper where

import Chez.Grater.Internal.Prelude

import Chez.Grater.Scraper.Types
  ( IngredientScraper(..), ScrapeError(..), ScrapeMetaWrapper(..), ScrapedRecipeName(..)
  , Scrapers(..), SiteName(..), StepScraper(..), ScrapedIngredient, ScrapedStep, title
  )
import Network.HTTP.Client (Manager)
import Network.URI (URI, uriAuthority, uriRegName)
import qualified Data.HashMap.Strict as HashMap
import qualified Data.Text as Text
import qualified Text.HTML.Scalpel as Scalpel

scrape
  :: (ScrapedRecipeName -> a)
  -> ([ScrapedIngredient] -> Either Text [b])
  -> ([ScrapedStep] -> Either Text [c])
  -> Scrapers -> Manager -> URI -> IO (a, [b], [c], ScrapeMetaWrapper)
scrape :: (ScrapedRecipeName -> a)
-> ([ScrapedIngredient] -> Either Text [b])
-> ([ScrapedStep] -> Either Text [c])
-> Scrapers
-> Manager
-> URI
-> IO (a, [b], [c], ScrapeMetaWrapper)
scrape ScrapedRecipeName -> a
mkName [ScrapedIngredient] -> Either Text [b]
runIngredientParser [ScrapedStep] -> Either Text [c]
runStepParser Scrapers
scrapers Manager
manager URI
uri = do
  let cfg :: Config Text
cfg = Decoder Text -> Maybe Manager -> Config Text
forall str. Decoder str -> Maybe Manager -> Config str
Scalpel.Config Decoder Text
forall str. StringLike str => Decoder str
Scalpel.defaultDecoder (Manager -> Maybe Manager
forall a. a -> Maybe a
Just Manager
manager)
  [Tag Text]
tags <- Config Text -> URL -> IO [Tag Text]
forall str. StringLike str => Config str -> URL -> IO [Tag str]
Scalpel.fetchTagsWithConfig Config Text
cfg (URI -> URL
forall a. Show a => a -> URL
show URI
uri)
  let domainMay :: Maybe SiteName
domainMay = Text -> SiteName
SiteName (Text -> SiteName) -> (URIAuth -> Text) -> URIAuth -> SiteName
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text -> Text -> Text
Text.replace Text
"www." Text
"" (Text -> Text) -> (URIAuth -> Text) -> URIAuth -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URL -> Text
Text.pack (URL -> Text) -> (URIAuth -> URL) -> URIAuth -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. URIAuth -> URL
uriRegName (URIAuth -> SiteName) -> Maybe URIAuth -> Maybe SiteName
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> URI -> Maybe URIAuth
uriAuthority URI
uri
      name :: ScrapedRecipeName
name = ScrapedRecipeName -> Maybe ScrapedRecipeName -> ScrapedRecipeName
forall a. a -> Maybe a -> a
fromMaybe (Text -> ScrapedRecipeName
ScrapedRecipeName Text
"Untitled") (Maybe ScrapedRecipeName -> ScrapedRecipeName)
-> Maybe ScrapedRecipeName -> ScrapedRecipeName
forall a b. (a -> b) -> a -> b
$ Scraper Text ScrapedRecipeName
-> [Tag Text] -> Maybe ScrapedRecipeName
forall str a.
StringLike str =>
Scraper str a -> [Tag str] -> Maybe a
Scalpel.scrape Scraper Text ScrapedRecipeName
title [Tag Text]
tags

      runScraper :: forall a b. ([a] -> Either Text [b]) -> Scalpel.Scraper Text [a] -> Maybe [b]
      runScraper :: ([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b]
runScraper [a] -> Either Text [b]
parser Scraper Text [a]
scraper = (Text -> Maybe [b])
-> ([b] -> Maybe [b]) -> Either Text [b] -> Maybe [b]
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Maybe [b] -> Text -> Maybe [b]
forall a b. a -> b -> a
const Maybe [b]
forall a. Maybe a
Nothing) [b] -> Maybe [b]
forall a. a -> Maybe a
Just (Either Text [b] -> Maybe [b])
-> ([a] -> Either Text [b]) -> [a] -> Maybe [b]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Either Text [b]
parser ([a] -> Maybe [b]) -> Maybe [a] -> Maybe [b]
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Scraper Text [a] -> [Tag Text] -> Maybe [a]
forall str a.
StringLike str =>
Scraper str a -> [Tag str] -> Maybe a
Scalpel.scrape Scraper Text [a]
scraper [Tag Text]
tags

      goIngredient :: IngredientScraper -> Maybe ([b], ScrapeMeta ScrapedIngredient)
goIngredient IngredientScraper {Scraper Text Bool
Scraper Text [ScrapedIngredient]
ScrapeMeta ScrapedIngredient
ingredientScraperRun :: IngredientScraper -> Scraper Text [ScrapedIngredient]
ingredientScraperTest :: IngredientScraper -> Scraper Text Bool
ingredientScraperMeta :: IngredientScraper -> ScrapeMeta ScrapedIngredient
ingredientScraperRun :: Scraper Text [ScrapedIngredient]
ingredientScraperTest :: Scraper Text Bool
ingredientScraperMeta :: ScrapeMeta ScrapedIngredient
..} = case Scraper Text Bool -> [Tag Text] -> Maybe Bool
forall str a.
StringLike str =>
Scraper str a -> [Tag str] -> Maybe a
Scalpel.scrape Scraper Text Bool
ingredientScraperTest [Tag Text]
tags of
        Just Bool
True -> (,ScrapeMeta ScrapedIngredient
ingredientScraperMeta) ([b] -> ([b], ScrapeMeta ScrapedIngredient))
-> Maybe [b] -> Maybe ([b], ScrapeMeta ScrapedIngredient)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([ScrapedIngredient] -> Either Text [b])
-> Scraper Text [ScrapedIngredient] -> Maybe [b]
forall a b.
([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b]
runScraper [ScrapedIngredient] -> Either Text [b]
runIngredientParser Scraper Text [ScrapedIngredient]
ingredientScraperRun
        Maybe Bool
_ -> Maybe ([b], ScrapeMeta ScrapedIngredient)
forall a. Maybe a
Nothing
      goStep :: StepScraper -> Maybe ([c], ScrapeMeta ScrapedStep)
goStep StepScraper {Scraper Text Bool
Scraper Text [ScrapedStep]
ScrapeMeta ScrapedStep
stepScraperRun :: StepScraper -> Scraper Text [ScrapedStep]
stepScraperTest :: StepScraper -> Scraper Text Bool
stepScraperMeta :: StepScraper -> ScrapeMeta ScrapedStep
stepScraperRun :: Scraper Text [ScrapedStep]
stepScraperTest :: Scraper Text Bool
stepScraperMeta :: ScrapeMeta ScrapedStep
..} = case Scraper Text Bool -> [Tag Text] -> Maybe Bool
forall str a.
StringLike str =>
Scraper str a -> [Tag str] -> Maybe a
Scalpel.scrape Scraper Text Bool
stepScraperTest [Tag Text]
tags of
        Just Bool
True -> (,ScrapeMeta ScrapedStep
stepScraperMeta) ([c] -> ([c], ScrapeMeta ScrapedStep))
-> Maybe [c] -> Maybe ([c], ScrapeMeta ScrapedStep)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ([ScrapedStep] -> Either Text [c])
-> Scraper Text [ScrapedStep] -> Maybe [c]
forall a b.
([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b]
runScraper [ScrapedStep] -> Either Text [c]
runStepParser Scraper Text [ScrapedStep]
stepScraperRun
        Maybe Bool
_ -> Maybe ([c], ScrapeMeta ScrapedStep)
forall a. Maybe a
Nothing

  ([b]
ingredients, ScrapeMeta ScrapedIngredient
ingredientMeta) <- case (SiteName
 -> HashMap SiteName IngredientScraper -> Maybe IngredientScraper)
-> HashMap SiteName IngredientScraper
-> SiteName
-> Maybe IngredientScraper
forall a b c. (a -> b -> c) -> b -> a -> c
flip SiteName
-> HashMap SiteName IngredientScraper -> Maybe IngredientScraper
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup (Scrapers -> HashMap SiteName IngredientScraper
scrapersIngredientBySite Scrapers
scrapers) (SiteName -> Maybe IngredientScraper)
-> Maybe SiteName -> Maybe IngredientScraper
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Maybe SiteName
domainMay of
    Just IngredientScraper {Scraper Text Bool
Scraper Text [ScrapedIngredient]
ScrapeMeta ScrapedIngredient
ingredientScraperRun :: Scraper Text [ScrapedIngredient]
ingredientScraperTest :: Scraper Text Bool
ingredientScraperMeta :: ScrapeMeta ScrapedIngredient
ingredientScraperRun :: IngredientScraper -> Scraper Text [ScrapedIngredient]
ingredientScraperTest :: IngredientScraper -> Scraper Text Bool
ingredientScraperMeta :: IngredientScraper -> ScrapeMeta ScrapedIngredient
..} -> IO ([b], ScrapeMeta ScrapedIngredient)
-> ([b] -> IO ([b], ScrapeMeta ScrapedIngredient))
-> Maybe [b]
-> IO ([b], ScrapeMeta ScrapedIngredient)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient)
forall e a. Exception e => e -> IO a
throwIO (ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient))
-> ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient)
forall a b. (a -> b) -> a -> b
$ Text -> ScrapeError
ScrapeError Text
"Failed to scrape known URL") (([b], ScrapeMeta ScrapedIngredient)
-> IO ([b], ScrapeMeta ScrapedIngredient)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (([b], ScrapeMeta ScrapedIngredient)
 -> IO ([b], ScrapeMeta ScrapedIngredient))
-> ([b] -> ([b], ScrapeMeta ScrapedIngredient))
-> [b]
-> IO ([b], ScrapeMeta ScrapedIngredient)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (,ScrapeMeta ScrapedIngredient
ingredientScraperMeta)) (Maybe [b] -> IO ([b], ScrapeMeta ScrapedIngredient))
-> Maybe [b] -> IO ([b], ScrapeMeta ScrapedIngredient)
forall a b. (a -> b) -> a -> b
$
      ([ScrapedIngredient] -> Either Text [b])
-> Scraper Text [ScrapedIngredient] -> Maybe [b]
forall a b.
([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b]
runScraper [ScrapedIngredient] -> Either Text [b]
runIngredientParser Scraper Text [ScrapedIngredient]
ingredientScraperRun
    Maybe IngredientScraper
Nothing -> IO ([b], ScrapeMeta ScrapedIngredient)
-> (([b], ScrapeMeta ScrapedIngredient)
    -> IO ([b], ScrapeMeta ScrapedIngredient))
-> Maybe ([b], ScrapeMeta ScrapedIngredient)
-> IO ([b], ScrapeMeta ScrapedIngredient)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient)
forall e a. Exception e => e -> IO a
throwIO (ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient))
-> ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient)
forall a b. (a -> b) -> a -> b
$ Text -> ScrapeError
ScrapeError Text
"Failed to scrape URL from defaults") ([b], ScrapeMeta ScrapedIngredient)
-> IO ([b], ScrapeMeta ScrapedIngredient)
forall (f :: * -> *) a. Applicative f => a -> f a
pure
     (Maybe ([b], ScrapeMeta ScrapedIngredient)
 -> IO ([b], ScrapeMeta ScrapedIngredient))
-> (Scrapers -> Maybe ([b], ScrapeMeta ScrapedIngredient))
-> Scrapers
-> IO ([b], ScrapeMeta ScrapedIngredient)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [([b], ScrapeMeta ScrapedIngredient)]
-> Maybe ([b], ScrapeMeta ScrapedIngredient)
forall a. [a] -> Maybe a
lastMay
     ([([b], ScrapeMeta ScrapedIngredient)]
 -> Maybe ([b], ScrapeMeta ScrapedIngredient))
-> (Scrapers -> [([b], ScrapeMeta ScrapedIngredient)])
-> Scrapers
-> Maybe ([b], ScrapeMeta ScrapedIngredient)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (([b], ScrapeMeta ScrapedIngredient) -> Int)
-> [([b], ScrapeMeta ScrapedIngredient)]
-> [([b], ScrapeMeta ScrapedIngredient)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn ([b] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([b] -> Int)
-> (([b], ScrapeMeta ScrapedIngredient) -> [b])
-> ([b], ScrapeMeta ScrapedIngredient)
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([b], ScrapeMeta ScrapedIngredient) -> [b]
forall a b. (a, b) -> a
fst)
     ([([b], ScrapeMeta ScrapedIngredient)]
 -> [([b], ScrapeMeta ScrapedIngredient)])
-> (Scrapers -> [([b], ScrapeMeta ScrapedIngredient)])
-> Scrapers
-> [([b], ScrapeMeta ScrapedIngredient)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (IngredientScraper -> Maybe ([b], ScrapeMeta ScrapedIngredient))
-> [IngredientScraper] -> [([b], ScrapeMeta ScrapedIngredient)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe IngredientScraper -> Maybe ([b], ScrapeMeta ScrapedIngredient)
goIngredient
     ([IngredientScraper] -> [([b], ScrapeMeta ScrapedIngredient)])
-> (Scrapers -> [IngredientScraper])
-> Scrapers
-> [([b], ScrapeMeta ScrapedIngredient)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Scrapers -> [IngredientScraper]
scrapersIngredients
     (Scrapers -> IO ([b], ScrapeMeta ScrapedIngredient))
-> Scrapers -> IO ([b], ScrapeMeta ScrapedIngredient)
forall a b. (a -> b) -> a -> b
$ Scrapers
scrapers
  Maybe ([c], ScrapeMeta ScrapedStep)
stepsMay <- case (SiteName -> HashMap SiteName StepScraper -> Maybe StepScraper)
-> HashMap SiteName StepScraper -> SiteName -> Maybe StepScraper
forall a b c. (a -> b -> c) -> b -> a -> c
flip SiteName -> HashMap SiteName StepScraper -> Maybe StepScraper
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
HashMap.lookup (Scrapers -> HashMap SiteName StepScraper
scrapersStepBySite Scrapers
scrapers) (SiteName -> Maybe StepScraper)
-> Maybe SiteName -> Maybe StepScraper
forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< Maybe SiteName
domainMay of
    Just StepScraper {Scraper Text Bool
Scraper Text [ScrapedStep]
ScrapeMeta ScrapedStep
stepScraperRun :: Scraper Text [ScrapedStep]
stepScraperTest :: Scraper Text Bool
stepScraperMeta :: ScrapeMeta ScrapedStep
stepScraperRun :: StepScraper -> Scraper Text [ScrapedStep]
stepScraperTest :: StepScraper -> Scraper Text Bool
stepScraperMeta :: StepScraper -> ScrapeMeta ScrapedStep
..} -> Maybe ([c], ScrapeMeta ScrapedStep)
-> IO (Maybe ([c], ScrapeMeta ScrapedStep))
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe ([c], ScrapeMeta ScrapedStep)
 -> IO (Maybe ([c], ScrapeMeta ScrapedStep)))
-> (Scraper Text [ScrapedStep]
    -> Maybe ([c], ScrapeMeta ScrapedStep))
-> Scraper Text [ScrapedStep]
-> IO (Maybe ([c], ScrapeMeta ScrapedStep))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([c] -> ([c], ScrapeMeta ScrapedStep))
-> Maybe [c] -> Maybe ([c], ScrapeMeta ScrapedStep)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (,ScrapeMeta ScrapedStep
stepScraperMeta) (Maybe [c] -> Maybe ([c], ScrapeMeta ScrapedStep))
-> (Scraper Text [ScrapedStep] -> Maybe [c])
-> Scraper Text [ScrapedStep]
-> Maybe ([c], ScrapeMeta ScrapedStep)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([ScrapedStep] -> Either Text [c])
-> Scraper Text [ScrapedStep] -> Maybe [c]
forall a b.
([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b]
runScraper [ScrapedStep] -> Either Text [c]
runStepParser (Scraper Text [ScrapedStep]
 -> IO (Maybe ([c], ScrapeMeta ScrapedStep)))
-> Scraper Text [ScrapedStep]
-> IO (Maybe ([c], ScrapeMeta ScrapedStep))
forall a b. (a -> b) -> a -> b
$ Scraper Text [ScrapedStep]
stepScraperRun
    Maybe StepScraper
Nothing -> Maybe ([c], ScrapeMeta ScrapedStep)
-> IO (Maybe ([c], ScrapeMeta ScrapedStep))
forall (f :: * -> *) a. Applicative f => a -> f a
pure
      (Maybe ([c], ScrapeMeta ScrapedStep)
 -> IO (Maybe ([c], ScrapeMeta ScrapedStep)))
-> (Scrapers -> Maybe ([c], ScrapeMeta ScrapedStep))
-> Scrapers
-> IO (Maybe ([c], ScrapeMeta ScrapedStep))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [([c], ScrapeMeta ScrapedStep)]
-> Maybe ([c], ScrapeMeta ScrapedStep)
forall a. [a] -> Maybe a
lastMay
      ([([c], ScrapeMeta ScrapedStep)]
 -> Maybe ([c], ScrapeMeta ScrapedStep))
-> (Scrapers -> [([c], ScrapeMeta ScrapedStep)])
-> Scrapers
-> Maybe ([c], ScrapeMeta ScrapedStep)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (([c], ScrapeMeta ScrapedStep) -> Int)
-> [([c], ScrapeMeta ScrapedStep)]
-> [([c], ScrapeMeta ScrapedStep)]
forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn ([c] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length ([c] -> Int)
-> (([c], ScrapeMeta ScrapedStep) -> [c])
-> ([c], ScrapeMeta ScrapedStep)
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([c], ScrapeMeta ScrapedStep) -> [c]
forall a b. (a, b) -> a
fst)
      ([([c], ScrapeMeta ScrapedStep)]
 -> [([c], ScrapeMeta ScrapedStep)])
-> (Scrapers -> [([c], ScrapeMeta ScrapedStep)])
-> Scrapers
-> [([c], ScrapeMeta ScrapedStep)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (StepScraper -> Maybe ([c], ScrapeMeta ScrapedStep))
-> [StepScraper] -> [([c], ScrapeMeta ScrapedStep)]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe StepScraper -> Maybe ([c], ScrapeMeta ScrapedStep)
goStep
      ([StepScraper] -> [([c], ScrapeMeta ScrapedStep)])
-> (Scrapers -> [StepScraper])
-> Scrapers
-> [([c], ScrapeMeta ScrapedStep)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Scrapers -> [StepScraper]
scrapersSites
      (Scrapers -> IO (Maybe ([c], ScrapeMeta ScrapedStep)))
-> Scrapers -> IO (Maybe ([c], ScrapeMeta ScrapedStep))
forall a b. (a -> b) -> a -> b
$ Scrapers
scrapers
  case Maybe ([c], ScrapeMeta ScrapedStep)
stepsMay of
    Just ([c]
steps, ScrapeMeta ScrapedStep
stepMeta) | Bool -> Bool
not ([c] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [c]
steps) -> (a, [b], [c], ScrapeMetaWrapper)
-> IO (a, [b], [c], ScrapeMetaWrapper)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ScrapedRecipeName -> a
mkName ScrapedRecipeName
name, [b]
ingredients, [c]
steps, ScrapeMeta ScrapedIngredient
-> ScrapeMeta ScrapedStep -> ScrapeMetaWrapper
ScrapeMetaWrapperIngredientAndStep ScrapeMeta ScrapedIngredient
ingredientMeta ScrapeMeta ScrapedStep
stepMeta)
    Maybe ([c], ScrapeMeta ScrapedStep)
_ -> (a, [b], [c], ScrapeMetaWrapper)
-> IO (a, [b], [c], ScrapeMetaWrapper)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ScrapedRecipeName -> a
mkName ScrapedRecipeName
name, [b]
ingredients, [], ScrapeMeta ScrapedIngredient -> ScrapeMetaWrapper
ScrapeMetaWrapperIngredient ScrapeMeta ScrapedIngredient
ingredientMeta)