module Chez.Grater.Scraper where import Chez.Grater.Internal.Prelude import Chez.Grater.Scraper.Types ( IngredientScraper(..), ScrapeError(..), ScrapeMetaWrapper(..), ScrapedRecipeName(..) , Scrapers(..), SiteName(..), StepScraper(..), ScrapedIngredient, ScrapedStep, title ) import Network.HTTP.Client (Manager) import Network.URI (URI, uriAuthority, uriRegName) import qualified Data.HashMap.Strict as HashMap import qualified Data.Text as Text import qualified Text.HTML.Scalpel as Scalpel scrape :: (ScrapedRecipeName -> a) -> ([ScrapedIngredient] -> Either Text [b]) -> ([ScrapedStep] -> Either Text [c]) -> Scrapers -> Manager -> URI -> IO (a, [b], [c], ScrapeMetaWrapper) scrape :: (ScrapedRecipeName -> a) -> ([ScrapedIngredient] -> Either Text [b]) -> ([ScrapedStep] -> Either Text [c]) -> Scrapers -> Manager -> URI -> IO (a, [b], [c], ScrapeMetaWrapper) scrape ScrapedRecipeName -> a mkName [ScrapedIngredient] -> Either Text [b] runIngredientParser [ScrapedStep] -> Either Text [c] runStepParser Scrapers scrapers Manager manager URI uri = do let cfg :: Config Text cfg = Decoder Text -> Maybe Manager -> Config Text forall str. Decoder str -> Maybe Manager -> Config str Scalpel.Config Decoder Text forall str. StringLike str => Decoder str Scalpel.defaultDecoder (Manager -> Maybe Manager forall a. a -> Maybe a Just Manager manager) [Tag Text] tags <- Config Text -> URL -> IO [Tag Text] forall str. StringLike str => Config str -> URL -> IO [Tag str] Scalpel.fetchTagsWithConfig Config Text cfg (URI -> URL forall a. Show a => a -> URL show URI uri) let domainMay :: Maybe SiteName domainMay = Text -> SiteName SiteName (Text -> SiteName) -> (URIAuth -> Text) -> URIAuth -> SiteName forall b c a. (b -> c) -> (a -> b) -> a -> c . Text -> Text -> Text -> Text Text.replace Text "www." Text "" (Text -> Text) -> (URIAuth -> Text) -> URIAuth -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . URL -> Text Text.pack (URL -> Text) -> (URIAuth -> URL) -> URIAuth -> Text forall b c a. (b -> c) -> (a -> b) -> a -> c . URIAuth -> URL uriRegName (URIAuth -> SiteName) -> Maybe URIAuth -> Maybe SiteName forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> URI -> Maybe URIAuth uriAuthority URI uri name :: ScrapedRecipeName name = ScrapedRecipeName -> Maybe ScrapedRecipeName -> ScrapedRecipeName forall a. a -> Maybe a -> a fromMaybe (Text -> ScrapedRecipeName ScrapedRecipeName Text "Untitled") (Maybe ScrapedRecipeName -> ScrapedRecipeName) -> Maybe ScrapedRecipeName -> ScrapedRecipeName forall a b. (a -> b) -> a -> b $ Scraper Text ScrapedRecipeName -> [Tag Text] -> Maybe ScrapedRecipeName forall str a. StringLike str => Scraper str a -> [Tag str] -> Maybe a Scalpel.scrape Scraper Text ScrapedRecipeName title [Tag Text] tags runScraper :: forall a b. ([a] -> Either Text [b]) -> Scalpel.Scraper Text [a] -> Maybe [b] runScraper :: ([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b] runScraper [a] -> Either Text [b] parser Scraper Text [a] scraper = (Text -> Maybe [b]) -> ([b] -> Maybe [b]) -> Either Text [b] -> Maybe [b] forall a c b. (a -> c) -> (b -> c) -> Either a b -> c either (Maybe [b] -> Text -> Maybe [b] forall a b. a -> b -> a const Maybe [b] forall a. Maybe a Nothing) [b] -> Maybe [b] forall a. a -> Maybe a Just (Either Text [b] -> Maybe [b]) -> ([a] -> Either Text [b]) -> [a] -> Maybe [b] forall b c a. (b -> c) -> (a -> b) -> a -> c . [a] -> Either Text [b] parser ([a] -> Maybe [b]) -> Maybe [a] -> Maybe [b] forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b =<< Scraper Text [a] -> [Tag Text] -> Maybe [a] forall str a. StringLike str => Scraper str a -> [Tag str] -> Maybe a Scalpel.scrape Scraper Text [a] scraper [Tag Text] tags goIngredient :: IngredientScraper -> Maybe ([b], ScrapeMeta ScrapedIngredient) goIngredient IngredientScraper {Scraper Text Bool Scraper Text [ScrapedIngredient] ScrapeMeta ScrapedIngredient ingredientScraperRun :: IngredientScraper -> Scraper Text [ScrapedIngredient] ingredientScraperTest :: IngredientScraper -> Scraper Text Bool ingredientScraperMeta :: IngredientScraper -> ScrapeMeta ScrapedIngredient ingredientScraperRun :: Scraper Text [ScrapedIngredient] ingredientScraperTest :: Scraper Text Bool ingredientScraperMeta :: ScrapeMeta ScrapedIngredient ..} = case Scraper Text Bool -> [Tag Text] -> Maybe Bool forall str a. StringLike str => Scraper str a -> [Tag str] -> Maybe a Scalpel.scrape Scraper Text Bool ingredientScraperTest [Tag Text] tags of Just Bool True -> (,ScrapeMeta ScrapedIngredient ingredientScraperMeta) ([b] -> ([b], ScrapeMeta ScrapedIngredient)) -> Maybe [b] -> Maybe ([b], ScrapeMeta ScrapedIngredient) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> ([ScrapedIngredient] -> Either Text [b]) -> Scraper Text [ScrapedIngredient] -> Maybe [b] forall a b. ([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b] runScraper [ScrapedIngredient] -> Either Text [b] runIngredientParser Scraper Text [ScrapedIngredient] ingredientScraperRun Maybe Bool _ -> Maybe ([b], ScrapeMeta ScrapedIngredient) forall a. Maybe a Nothing goStep :: StepScraper -> Maybe ([c], ScrapeMeta ScrapedStep) goStep StepScraper {Scraper Text Bool Scraper Text [ScrapedStep] ScrapeMeta ScrapedStep stepScraperRun :: StepScraper -> Scraper Text [ScrapedStep] stepScraperTest :: StepScraper -> Scraper Text Bool stepScraperMeta :: StepScraper -> ScrapeMeta ScrapedStep stepScraperRun :: Scraper Text [ScrapedStep] stepScraperTest :: Scraper Text Bool stepScraperMeta :: ScrapeMeta ScrapedStep ..} = case Scraper Text Bool -> [Tag Text] -> Maybe Bool forall str a. StringLike str => Scraper str a -> [Tag str] -> Maybe a Scalpel.scrape Scraper Text Bool stepScraperTest [Tag Text] tags of Just Bool True -> (,ScrapeMeta ScrapedStep stepScraperMeta) ([c] -> ([c], ScrapeMeta ScrapedStep)) -> Maybe [c] -> Maybe ([c], ScrapeMeta ScrapedStep) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> ([ScrapedStep] -> Either Text [c]) -> Scraper Text [ScrapedStep] -> Maybe [c] forall a b. ([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b] runScraper [ScrapedStep] -> Either Text [c] runStepParser Scraper Text [ScrapedStep] stepScraperRun Maybe Bool _ -> Maybe ([c], ScrapeMeta ScrapedStep) forall a. Maybe a Nothing ([b] ingredients, ScrapeMeta ScrapedIngredient ingredientMeta) <- case (SiteName -> HashMap SiteName IngredientScraper -> Maybe IngredientScraper) -> HashMap SiteName IngredientScraper -> SiteName -> Maybe IngredientScraper forall a b c. (a -> b -> c) -> b -> a -> c flip SiteName -> HashMap SiteName IngredientScraper -> Maybe IngredientScraper forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v HashMap.lookup (Scrapers -> HashMap SiteName IngredientScraper scrapersIngredientBySite Scrapers scrapers) (SiteName -> Maybe IngredientScraper) -> Maybe SiteName -> Maybe IngredientScraper forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b =<< Maybe SiteName domainMay of Just IngredientScraper {Scraper Text Bool Scraper Text [ScrapedIngredient] ScrapeMeta ScrapedIngredient ingredientScraperRun :: Scraper Text [ScrapedIngredient] ingredientScraperTest :: Scraper Text Bool ingredientScraperMeta :: ScrapeMeta ScrapedIngredient ingredientScraperRun :: IngredientScraper -> Scraper Text [ScrapedIngredient] ingredientScraperTest :: IngredientScraper -> Scraper Text Bool ingredientScraperMeta :: IngredientScraper -> ScrapeMeta ScrapedIngredient ..} -> IO ([b], ScrapeMeta ScrapedIngredient) -> ([b] -> IO ([b], ScrapeMeta ScrapedIngredient)) -> Maybe [b] -> IO ([b], ScrapeMeta ScrapedIngredient) forall b a. b -> (a -> b) -> Maybe a -> b maybe (ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient) forall e a. Exception e => e -> IO a throwIO (ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient)) -> ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient) forall a b. (a -> b) -> a -> b $ Text -> ScrapeError ScrapeError Text "Failed to scrape known URL") (([b], ScrapeMeta ScrapedIngredient) -> IO ([b], ScrapeMeta ScrapedIngredient) forall (f :: * -> *) a. Applicative f => a -> f a pure (([b], ScrapeMeta ScrapedIngredient) -> IO ([b], ScrapeMeta ScrapedIngredient)) -> ([b] -> ([b], ScrapeMeta ScrapedIngredient)) -> [b] -> IO ([b], ScrapeMeta ScrapedIngredient) forall b c a. (b -> c) -> (a -> b) -> a -> c . (,ScrapeMeta ScrapedIngredient ingredientScraperMeta)) (Maybe [b] -> IO ([b], ScrapeMeta ScrapedIngredient)) -> Maybe [b] -> IO ([b], ScrapeMeta ScrapedIngredient) forall a b. (a -> b) -> a -> b $ ([ScrapedIngredient] -> Either Text [b]) -> Scraper Text [ScrapedIngredient] -> Maybe [b] forall a b. ([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b] runScraper [ScrapedIngredient] -> Either Text [b] runIngredientParser Scraper Text [ScrapedIngredient] ingredientScraperRun Maybe IngredientScraper Nothing -> IO ([b], ScrapeMeta ScrapedIngredient) -> (([b], ScrapeMeta ScrapedIngredient) -> IO ([b], ScrapeMeta ScrapedIngredient)) -> Maybe ([b], ScrapeMeta ScrapedIngredient) -> IO ([b], ScrapeMeta ScrapedIngredient) forall b a. b -> (a -> b) -> Maybe a -> b maybe (ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient) forall e a. Exception e => e -> IO a throwIO (ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient)) -> ScrapeError -> IO ([b], ScrapeMeta ScrapedIngredient) forall a b. (a -> b) -> a -> b $ Text -> ScrapeError ScrapeError Text "Failed to scrape URL from defaults") ([b], ScrapeMeta ScrapedIngredient) -> IO ([b], ScrapeMeta ScrapedIngredient) forall (f :: * -> *) a. Applicative f => a -> f a pure (Maybe ([b], ScrapeMeta ScrapedIngredient) -> IO ([b], ScrapeMeta ScrapedIngredient)) -> (Scrapers -> Maybe ([b], ScrapeMeta ScrapedIngredient)) -> Scrapers -> IO ([b], ScrapeMeta ScrapedIngredient) forall b c a. (b -> c) -> (a -> b) -> a -> c . [([b], ScrapeMeta ScrapedIngredient)] -> Maybe ([b], ScrapeMeta ScrapedIngredient) forall a. [a] -> Maybe a lastMay ([([b], ScrapeMeta ScrapedIngredient)] -> Maybe ([b], ScrapeMeta ScrapedIngredient)) -> (Scrapers -> [([b], ScrapeMeta ScrapedIngredient)]) -> Scrapers -> Maybe ([b], ScrapeMeta ScrapedIngredient) forall b c a. (b -> c) -> (a -> b) -> a -> c . (([b], ScrapeMeta ScrapedIngredient) -> Int) -> [([b], ScrapeMeta ScrapedIngredient)] -> [([b], ScrapeMeta ScrapedIngredient)] forall b a. Ord b => (a -> b) -> [a] -> [a] sortOn ([b] -> Int forall (t :: * -> *) a. Foldable t => t a -> Int length ([b] -> Int) -> (([b], ScrapeMeta ScrapedIngredient) -> [b]) -> ([b], ScrapeMeta ScrapedIngredient) -> Int forall b c a. (b -> c) -> (a -> b) -> a -> c . ([b], ScrapeMeta ScrapedIngredient) -> [b] forall a b. (a, b) -> a fst) ([([b], ScrapeMeta ScrapedIngredient)] -> [([b], ScrapeMeta ScrapedIngredient)]) -> (Scrapers -> [([b], ScrapeMeta ScrapedIngredient)]) -> Scrapers -> [([b], ScrapeMeta ScrapedIngredient)] forall b c a. (b -> c) -> (a -> b) -> a -> c . (IngredientScraper -> Maybe ([b], ScrapeMeta ScrapedIngredient)) -> [IngredientScraper] -> [([b], ScrapeMeta ScrapedIngredient)] forall a b. (a -> Maybe b) -> [a] -> [b] mapMaybe IngredientScraper -> Maybe ([b], ScrapeMeta ScrapedIngredient) goIngredient ([IngredientScraper] -> [([b], ScrapeMeta ScrapedIngredient)]) -> (Scrapers -> [IngredientScraper]) -> Scrapers -> [([b], ScrapeMeta ScrapedIngredient)] forall b c a. (b -> c) -> (a -> b) -> a -> c . Scrapers -> [IngredientScraper] scrapersIngredients (Scrapers -> IO ([b], ScrapeMeta ScrapedIngredient)) -> Scrapers -> IO ([b], ScrapeMeta ScrapedIngredient) forall a b. (a -> b) -> a -> b $ Scrapers scrapers Maybe ([c], ScrapeMeta ScrapedStep) stepsMay <- case (SiteName -> HashMap SiteName StepScraper -> Maybe StepScraper) -> HashMap SiteName StepScraper -> SiteName -> Maybe StepScraper forall a b c. (a -> b -> c) -> b -> a -> c flip SiteName -> HashMap SiteName StepScraper -> Maybe StepScraper forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v HashMap.lookup (Scrapers -> HashMap SiteName StepScraper scrapersStepBySite Scrapers scrapers) (SiteName -> Maybe StepScraper) -> Maybe SiteName -> Maybe StepScraper forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b =<< Maybe SiteName domainMay of Just StepScraper {Scraper Text Bool Scraper Text [ScrapedStep] ScrapeMeta ScrapedStep stepScraperRun :: Scraper Text [ScrapedStep] stepScraperTest :: Scraper Text Bool stepScraperMeta :: ScrapeMeta ScrapedStep stepScraperRun :: StepScraper -> Scraper Text [ScrapedStep] stepScraperTest :: StepScraper -> Scraper Text Bool stepScraperMeta :: StepScraper -> ScrapeMeta ScrapedStep ..} -> Maybe ([c], ScrapeMeta ScrapedStep) -> IO (Maybe ([c], ScrapeMeta ScrapedStep)) forall (f :: * -> *) a. Applicative f => a -> f a pure (Maybe ([c], ScrapeMeta ScrapedStep) -> IO (Maybe ([c], ScrapeMeta ScrapedStep))) -> (Scraper Text [ScrapedStep] -> Maybe ([c], ScrapeMeta ScrapedStep)) -> Scraper Text [ScrapedStep] -> IO (Maybe ([c], ScrapeMeta ScrapedStep)) forall b c a. (b -> c) -> (a -> b) -> a -> c . ([c] -> ([c], ScrapeMeta ScrapedStep)) -> Maybe [c] -> Maybe ([c], ScrapeMeta ScrapedStep) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b fmap (,ScrapeMeta ScrapedStep stepScraperMeta) (Maybe [c] -> Maybe ([c], ScrapeMeta ScrapedStep)) -> (Scraper Text [ScrapedStep] -> Maybe [c]) -> Scraper Text [ScrapedStep] -> Maybe ([c], ScrapeMeta ScrapedStep) forall b c a. (b -> c) -> (a -> b) -> a -> c . ([ScrapedStep] -> Either Text [c]) -> Scraper Text [ScrapedStep] -> Maybe [c] forall a b. ([a] -> Either Text [b]) -> Scraper Text [a] -> Maybe [b] runScraper [ScrapedStep] -> Either Text [c] runStepParser (Scraper Text [ScrapedStep] -> IO (Maybe ([c], ScrapeMeta ScrapedStep))) -> Scraper Text [ScrapedStep] -> IO (Maybe ([c], ScrapeMeta ScrapedStep)) forall a b. (a -> b) -> a -> b $ Scraper Text [ScrapedStep] stepScraperRun Maybe StepScraper Nothing -> Maybe ([c], ScrapeMeta ScrapedStep) -> IO (Maybe ([c], ScrapeMeta ScrapedStep)) forall (f :: * -> *) a. Applicative f => a -> f a pure (Maybe ([c], ScrapeMeta ScrapedStep) -> IO (Maybe ([c], ScrapeMeta ScrapedStep))) -> (Scrapers -> Maybe ([c], ScrapeMeta ScrapedStep)) -> Scrapers -> IO (Maybe ([c], ScrapeMeta ScrapedStep)) forall b c a. (b -> c) -> (a -> b) -> a -> c . [([c], ScrapeMeta ScrapedStep)] -> Maybe ([c], ScrapeMeta ScrapedStep) forall a. [a] -> Maybe a lastMay ([([c], ScrapeMeta ScrapedStep)] -> Maybe ([c], ScrapeMeta ScrapedStep)) -> (Scrapers -> [([c], ScrapeMeta ScrapedStep)]) -> Scrapers -> Maybe ([c], ScrapeMeta ScrapedStep) forall b c a. (b -> c) -> (a -> b) -> a -> c . (([c], ScrapeMeta ScrapedStep) -> Int) -> [([c], ScrapeMeta ScrapedStep)] -> [([c], ScrapeMeta ScrapedStep)] forall b a. Ord b => (a -> b) -> [a] -> [a] sortOn ([c] -> Int forall (t :: * -> *) a. Foldable t => t a -> Int length ([c] -> Int) -> (([c], ScrapeMeta ScrapedStep) -> [c]) -> ([c], ScrapeMeta ScrapedStep) -> Int forall b c a. (b -> c) -> (a -> b) -> a -> c . ([c], ScrapeMeta ScrapedStep) -> [c] forall a b. (a, b) -> a fst) ([([c], ScrapeMeta ScrapedStep)] -> [([c], ScrapeMeta ScrapedStep)]) -> (Scrapers -> [([c], ScrapeMeta ScrapedStep)]) -> Scrapers -> [([c], ScrapeMeta ScrapedStep)] forall b c a. (b -> c) -> (a -> b) -> a -> c . (StepScraper -> Maybe ([c], ScrapeMeta ScrapedStep)) -> [StepScraper] -> [([c], ScrapeMeta ScrapedStep)] forall a b. (a -> Maybe b) -> [a] -> [b] mapMaybe StepScraper -> Maybe ([c], ScrapeMeta ScrapedStep) goStep ([StepScraper] -> [([c], ScrapeMeta ScrapedStep)]) -> (Scrapers -> [StepScraper]) -> Scrapers -> [([c], ScrapeMeta ScrapedStep)] forall b c a. (b -> c) -> (a -> b) -> a -> c . Scrapers -> [StepScraper] scrapersSites (Scrapers -> IO (Maybe ([c], ScrapeMeta ScrapedStep))) -> Scrapers -> IO (Maybe ([c], ScrapeMeta ScrapedStep)) forall a b. (a -> b) -> a -> b $ Scrapers scrapers case Maybe ([c], ScrapeMeta ScrapedStep) stepsMay of Just ([c] steps, ScrapeMeta ScrapedStep stepMeta) | Bool -> Bool not ([c] -> Bool forall (t :: * -> *) a. Foldable t => t a -> Bool null [c] steps) -> (a, [b], [c], ScrapeMetaWrapper) -> IO (a, [b], [c], ScrapeMetaWrapper) forall (f :: * -> *) a. Applicative f => a -> f a pure (ScrapedRecipeName -> a mkName ScrapedRecipeName name, [b] ingredients, [c] steps, ScrapeMeta ScrapedIngredient -> ScrapeMeta ScrapedStep -> ScrapeMetaWrapper ScrapeMetaWrapperIngredientAndStep ScrapeMeta ScrapedIngredient ingredientMeta ScrapeMeta ScrapedStep stepMeta) Maybe ([c], ScrapeMeta ScrapedStep) _ -> (a, [b], [c], ScrapeMetaWrapper) -> IO (a, [b], [c], ScrapeMetaWrapper) forall (f :: * -> *) a. Applicative f => a -> f a pure (ScrapedRecipeName -> a mkName ScrapedRecipeName name, [b] ingredients, [], ScrapeMeta ScrapedIngredient -> ScrapeMetaWrapper ScrapeMetaWrapperIngredient ScrapeMeta ScrapedIngredient ingredientMeta)