-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Scrape websites for changes -- -- This library scrapes websites and invokes callbacks when there are -- changes, similar to a RSS reader. Sending an email by invoking -- sendmail is a built-in callback mechanism. Of course, users can -- provide their own callback function in addition. -- -- Look at Network.ScrapeChanges for a full working example on how -- to use "scrape-changes". @package scrape-changes @version 0.1.0.5 module Network.ScrapeChanges.Domain -- | String encoded in the standard cron format type CronScheduleString = String -- | Url to scrape type Url = String -- | Body of the HTTP request type HttpBody = ByteString -- | Function extracting Text of HttpBody type Scraper = HttpBody -> Text -- | Codomain of Scraper type Text = Text -- | Mail address for provided MailConfig data MailAddr MailAddr :: Maybe Text -> String -> MailAddr -- | Optional name for the given _mailAddr [_mailAddrName] :: MailAddr -> Maybe Text -- | Mail address [_mailAddr] :: MailAddr -> String mailAddrName :: Lens' MailAddr (Maybe Text) mailAddr :: Lens' MailAddr String data Mail Mail :: MailAddr -> NonEmpty MailAddr -> Text -> Text -> Mail [_mailFrom] :: Mail -> MailAddr [_mailTo] :: Mail -> NonEmpty MailAddr [_mailSubject] :: Mail -> Text [_mailBody] :: Mail -> Text mailTo :: Lens' Mail (NonEmpty MailAddr) mailSubject :: Lens' Mail Text mailFrom :: Lens' Mail MailAddr mailBody :: Lens' Mail Text data CallbackConfig -- | Send a mail when there's changed data at your scrape target. This -- needs sendmail to be configured correctly on the host your program -- runs. MailConfig :: Mail -> CallbackConfig -- | Just execute the provided function when there's changed data at your -- scrape target. OtherConfig :: (Text -> IO ()) -> CallbackConfig _OtherConfig :: Prism' CallbackConfig (Text -> IO ()) _MailConfig :: Prism' CallbackConfig Mail data ScrapeConfig ScrapeConfig :: String -> CallbackConfig -> ScrapeConfig -- | The url to be called using GET [_scrapeInfoUrl] :: ScrapeConfig -> String -- | The callback config to be executed when something in -- _scrapeInfoUrl has changed [_scrapeInfoCallbackConfig] :: ScrapeConfig -> CallbackConfig scrapeInfoUrl :: Lens' ScrapeConfig String scrapeInfoCallbackConfig :: Lens' ScrapeConfig CallbackConfig data ScrapeResult -- | Signals that the last execution of the provided ScrapeConfig -- led to execution of CallbackConfig CallbackCalled :: ScrapeResult -- | Signals that the last execution of the provided ScrapeConfig -- didn't lead to execution of CallbackConfig CallbackNotCalled :: ScrapeResult data ScrapeSchedule ScrapeSchedule :: CronScheduleString -> ScrapeConfig -> Scraper -> ScrapeSchedule [_scrapeScheduleCron] :: ScrapeSchedule -> CronScheduleString [_scrapeScheduleConfig] :: ScrapeSchedule -> ScrapeConfig [_scrapeScheduleScraper] :: ScrapeSchedule -> Scraper scrapeScheduleScraper :: Lens' ScrapeSchedule Scraper scrapeScheduleCron :: Lens' ScrapeSchedule CronScheduleString scrapeScheduleConfig :: Lens' ScrapeSchedule ScrapeConfig data ValidationError UrlNotAbsolute :: ValidationError UrlProtocolInvalid :: ValidationError MailConfigInvalidMailFromAddr :: String -> ValidationError MailConfigInvalidMailToAddr :: String -> ValidationError CronScheduleInvalid :: String -> ValidationError type ScrapeValidation t = AccValidation [ValidationError] t instance GHC.Classes.Eq Network.ScrapeChanges.Domain.ValidationError instance GHC.Show.Show Network.ScrapeChanges.Domain.ValidationError instance GHC.Show.Show Network.ScrapeChanges.Domain.ScrapeResult instance Data.Hashable.Class.Hashable Network.ScrapeChanges.Domain.ScrapeConfig instance GHC.Classes.Eq Network.ScrapeChanges.Domain.ScrapeConfig instance GHC.Classes.Eq Network.ScrapeChanges.Domain.CallbackConfig instance Data.Hashable.Class.Hashable Network.ScrapeChanges.Domain.Mail instance GHC.Generics.Generic Network.ScrapeChanges.Domain.Mail instance GHC.Classes.Eq Network.ScrapeChanges.Domain.Mail instance GHC.Show.Show Network.ScrapeChanges.Domain.Mail instance Data.Hashable.Class.Hashable Network.ScrapeChanges.Domain.MailAddr instance GHC.Generics.Generic Network.ScrapeChanges.Domain.MailAddr instance GHC.Classes.Eq Network.ScrapeChanges.Domain.MailAddr instance GHC.Show.Show Network.ScrapeChanges.Domain.MailAddr module Network.ScrapeChanges.Internal -- | Helper constructor for ScrapeConfig containing -- MailConfig callback. mailScrapeConfig :: ScrapeInfoUrl -> MailFromAddr -> NonEmpty MailToAddr -> ScrapeConfig -- | Helper constructor for ScrapeConfig containing -- OtherConfig callback. otherScrapeConfig :: ScrapeInfoUrl -> (Text -> IO ()) -> ScrapeConfig validateScrapeConfig :: ScrapeConfig -> ScrapeValidation ScrapeConfig validateCronSchedule :: CronScheduleString -> ScrapeValidation CronScheduleString readLatestHash :: (Hashable t) => t -> IO (Maybe Hash) saveHash :: (Hashable t) => t -> Hash -> IO () executeCallbackConfig :: ScrapeConfig -> Text -> IO () removeHash :: (Hashable t) => t -> IO () removeHashes :: IO () hash' :: Hashable t => t -> String type ScrapeInfoUrl = String type MailFromAddr = MailAddr type MailToAddr = MailAddr type Hash = String loggerName :: String httpExceptionHandler :: ScrapeConfig -> HttpException -> IO t -- | Main module, reexports everything you need to use "scrape-changes". -- Full working example: -- --
--   {-# LANGUAGE OverloadedStrings #-}
--   
--   import Data.ByteString (isInfixOf)
--   import Data.ByteString.Lazy (ByteString, toStrict)
--   import Data.Text.Lazy.Encoding (decodeUtf8With)
--   import Data.Foldable (find)
--   import Data.Maybe (fromMaybe)
--   import Text.HTML.TagSoup (Tag(..), (~==), (~/=), parseTags, fromAttrib) 
--   import Data.List.NonEmpty (NonEmpty ((:|)))
--   import qualified System.Log.Logger as Logger
--   import qualified System.Log.Handler.Syslog as Syslog
--   import Data.Monoid ((<>))
--   import Control.Monad (forever)
--   import Network.ScrapeChanges
--   
--   main :: IO ()
--   main = do
--     _ <- configureLogging
--     _ <- (Logger.errorM thisLogger . show) either id $ scrapeChangesJobs 
--     putStrLn "scrape-changes examples executable. Just look at the example source code."
--     -- |Simplest way to block the main thread forever. Good enough for the use cases of 'scrape-changes'
--     _ <- forever getLine
--     -- |Will never be executed in this case
--     clearAllScrapeConfigs
--   
--   -- |Google logo scrape function using the tagsoup library
--   scrapeGoogleLogo :: ByteString -> Text
--   scrapeGoogleLogo byteString =   
--     let tags                 = parseTags byteString
--         divWithBackgroundUrl = find (~/= TagClose ("div" :: ByteString)) $
--                                dropWhile (not . isDivWithBackgroundUrl) tags 
--         resultMaybe          = decodeUtf8Lenient . styleAttribContent $ divWithBackgroundUrl
--     in fromMaybe "" resultMaybe 
--     where decodeUtf8Lenient = decodeUtf8With $ const . const . Just $ ?
--           isDivWithBackgroundUrl t = 
--             let containsBackgroundUrl = isInfixOf "background:url" . toStrict
--             in t ~== TagOpen ("div" :: ByteString) [] && containsBackgroundUrl (styleAttribContent t)
--           styleAttribContent = fromAttrib "style"
--   
--   scrapeChangesJobs :: Either [(Url, [ValidationError])] (IO ())
--   scrapeChangesJobs = repeatScrapeAll [
--       -- Checks each minute for changes and sends a mail if there are any
--       ScrapeSchedule {
--         _scrapeScheduleCron = "* * * * *" -- std cron format
--       , _scrapeScheduleConfig = mailScrapeConfig "http://www.google.co.uk" -- to scrape
--                                                  (MailAddr Nothing "maxmustermann.de") -- from
--                                                  (MailAddr Nothing "receiverscrape-changes.com" :| []) -- to
--       , _scrapeScheduleScraper = scrapeGoogleLogo --scrape function
--       }
--       -- Checks each minute for changes and notifies to syslog if there are any
--     , ScrapeSchedule {
--         _scrapeScheduleCron = "* * * * *"
--       , _scrapeScheduleConfig = otherScrapeConfig "http://www.google.co.uk" 
--                                                   (text -> Logger.infoM thisLogger . show $ 
--                                                             "Change detected: " <> text)
--       , _scrapeScheduleScraper = scrapeGoogleLogo
--       }
--     ]
--   
--   configureLogging :: IO ()
--   configureLogging = do
--     syslogHandler <- Syslog.openlog thisLogger [] Syslog.DAEMON Logger.DEBUG
--     let logConfig = flip Logger.updateGlobalLogger (Logger.addHandler syslogHandler . Logger.setLevel Logger.DEBUG)
--     sequence_ $ logConfig $ [Network.ScrapeChanges, thisLogger]
--   
--   thisLogger :: String
--   thisLogger = "scrape-changes-examples"
--   
module Network.ScrapeChanges -- | The basic scrape function. It fires a GET request against the url -- defined within the provided ScrapeConfig. The body is passed to -- the provided Scraper. The result Text of the latter is -- used to determine whether something has changed on the respective -- website. If so, the callback configured in ScrapeConfig is -- executed and CallbackCalled is returned. Otherwise -- CallbackNotCalled is returned. scrape :: ScrapeConfig -> Scraper -> Either [ValidationError] (IO ScrapeResult) -- | Repeat executing scrape by providing a -- CronScheduleString. The returned IO action is non blocking repeatScrape :: CronScheduleString -> ScrapeConfig -> Scraper -> Either [ValidationError] (IO ()) -- | Execute a list of ScrapeSchedule in parallel. If validation of -- any ScrapeSchedule fails, Left containing -- ValidationError indexed by the corresponding Url is -- returned. repeatScrapeAll :: [ScrapeSchedule] -> Either [(Url, [ValidationError])] (IO ()) -- | Execute a list of ScrapeConfig in sequence using scrape -- and collect the results in a map containing the respective Url -- as key. scrapeAll :: [(ScrapeConfig, Scraper)] -> [(Url, Either [ValidationError] (IO ScrapeResult))] -- | Helper constructor for ScrapeConfig containing -- MailConfig callback. mailScrapeConfig :: ScrapeInfoUrl -> MailFromAddr -> NonEmpty MailToAddr -> ScrapeConfig -- | Helper constructor for ScrapeConfig containing -- OtherConfig callback. otherScrapeConfig :: ScrapeInfoUrl -> (Text -> IO ()) -> ScrapeConfig -- | Clear all mutable state associated with the provided -- ScrapeConfig clearScrapeConfig :: ScrapeConfig -> IO () -- | Clear all mutable state ever used by "scrape-changes" clearAllScrapeConfigs :: IO ()