{- This file is part of feed-collect. - - Written in 2015 by fr33domlover . - Date parser format strings written originally by koral - for the imm package and were copied here (imm is released as WTFPL). - - ♡ Copying is an act of love. Please copy, reuse and share. - - The author(s) have dedicated all copyright and related and neighboring - rights to this software to the public domain worldwide. This software is - distributed without any warranty. - - You should have received a copy of the CC0 Public Domain Dedication along - with this software. If not, see - . -} -- | = Intro -- -- This module allows you to run a loop which visits web feeds (RSS, Atom, -- etc.) periodically and reports new items using a function you provide. You -- can also insert control commands into the loop, such as adding and removing -- a feed, so that stopping and restarting it isn't required. -- -- Both http and https URLs are supported. -- -- The original use case which motivated the creation of this library is -- . -- -- = Running -- -- The 'run' function runs the loop, and it takes a collector callback function -- (as one of its parameters) to be called when new feed items are found. -- -- Here is a simple usage example. -- -- > import Data.Default.Class (def) -- > import Data.Time.Interval -- > import Data.Time.Units -- > import Web.Feed.Collect -- > -- > collect :: Label -> URL -> Feed -> Item -> IO () -- > collect label url feed item = do -- > putStrLn $ label ++ " : " ++ url -- > putStrLn "Got a new feed item!" -- > putStrLn $ showFeed feed -- > putStrLn $ showItem item -- > -- > logError :: Label -> Error -> IO () -- > logError l e = putStrLn $ l ++ " : " ++ show e -- > -- > feeds :: [(Label, Url)] -- > feeds = [("democ-now", "http://www.democracynow.org/democracynow.rss")] -- > -- > main :: IO () -- > main = run def -- > { wcCollect = collect -- > , wcLogError = logError -- > , wcVisitInterval = time (1 :: Minute) -- > , wcMaxItems = 3 -- > , wcFeeds = fromPairs feeds -- > } -- -- For quick testing, you can use one of the predefined collector functions. -- For example, there is 'collectorNull' which discards the feed items and does -- nothing (you can use it e.g. when testing success of HTTP requests or -- debugging the library itself), and 'collectorPretty' which writes a short -- nicely formatted entry to stdout for each new feed item. And there are more. -- -- If new items aren't being detected correctly, you can enable debugging -- through the 'wcDebug' field. You can get detailed logs of the detection -- process. You can @tail -f@ a log file from your terminal to watch new log -- entries get appended to it. -- -- = Using Control Commands -- -- Now let's see how to push control commands into the loop. The -- 'wcCommandQueue' field is an optional command queue for making changes -- while the watcher runs. In the example above we didn't provide one. Now -- let's provide a queue and use it. -- -- Suppose we are writing a program with a command-line interface. It watches -- news feeds in the background, and can take commands from the user at the -- same time on the command-line. Feeds can be added, removed, etc. without -- restarting the program. -- -- Assume we've written a function named @parseCommand@, which takes a line of -- user input and returns a command ready to push into the queue. The parsed -- commands are of type 'Command' and are created using functions like -- 'addFeed', 'removeFeed' and so on, which this module provides. Using our -- @parseCommand@ we can write the program like this: -- -- > import Control.Concurrent (forkIO) -- > import Data.Default.Class (def) -- > import Data.Time.Interval -- > import Data.Time.Units -- > import System.IO -- > import Web.Feed.Collect -- > -- > collect :: Label -> URL -> Feed -> Item -> IO () -- > collect = collectPretty -- > -- > feed :: FeedConfig -- > feed = mkFeed "fsf-news" "https://www.fsf.org/static/fsforg/rss/news.xml" -- > -- > parseCommand :: String -> Maybe Command -- > parseCommand line = {- ... - } -- > -- > main :: IO () -- > main = do -- > cqueue <- newCommandQueue -- > forkIO $ run def -- > { wcCollect = collect -- > , wcCommandQueue = Just cqueue -- > , wcFeeds = [feed] -- > } -- > let loop = do -- > line <- getLine -- > if line == "quit" -- > then putStrLn "Bye!" -- > else do -- > case parseCommand line of -- > Just cmd -> sendCommand cqueue cmd -- > Nothing -> putStrLn "Invalid input" -- > loop -- > loop module Web.Feed.Collect ( -- * Misc Types Label , Url , Error (..) -- * Specifying Feeds , FeedConfig () , fcLabel , fcUrl , fcActive , fcDebug , mkFeed , fromPairs -- * Watcher Configuration , WatcherConfig () , wcCollect , wcCollectMany , wcLogError , wcCommandQueue , wcVisitInterval , wcMaxItems , wcFeeds , wcDebug -- * Collectors , collectorNull , collectorPrint , collectorPretty , collectorLog -- * Running , run -- * Control Commands , CommandQueue () , newCommandQueue , sendCommand , sendCommands , Command () , addFeed , removeFeed , setFeedActive , setInterval , setMaxPerVisit -- * Utilities and Debugging , showFeed , showItem , DebugConfig (..) , setFeedDebug , setGeneralDebug ) where import Control.Concurrent (threadDelay) import Control.Concurrent.MVar import Control.Exception (catch) import Control.Monad (liftM, when) import Control.Monad.IO.Class (MonadIO, liftIO) import Data.Default.Class import Data.List (partition) import Data.Maybe (catMaybes, fromMaybe, listToMaybe) import Data.Time.Clock (UTCTime) import Data.Time.Format import Data.Time.Interval import Data.Time.LocalTime (getZonedTime, zonedTimeToUTC) import Data.Time.RFC2822 (parseTimeRFC2822) import Data.Time.RFC3339 (parseTimeRFC3339) import Data.Time.RFC822 (parseTimeRFC822) import Data.Time.Units (TimeUnit (..), Minute) import Network.HTTP.Client import Network.HTTP.Client.TLS (tlsManagerSettings) import System.IO import Text.Feed.Import (parseFeedString) import Text.Feed.Query import Text.Feed.Types (Item, Feed (..)) import Text.Show.Functions () import qualified Data.ByteString.Lazy.UTF8 as BU -- | A short name tag for a feed, for quick reference and internal use. type Label = String -- | An HTTP or HTTPS URL of a feed or a feed item. type Url = String -- | A control command sent to the feed watching loop, and affecting its -- behavior. data Command -- | Add a new feed to watch. = AddFeed FeedConfig -- | Remove a previously added feed. | RemoveFeed Label -- | Set whether a given feed should be watched (active) or not (inactive). | SetFeedActive Label Bool -- | Set the interval, in microseconds, between feed scans. | SetInterval Int -- | Set the maximal number of feed items to be collected per feed per -- scan. If more new items are found, they wait for the next scan. | SetMaxPerVisit Int -- | Enable or disable feed specific debug log. | SetFeedDebug Label Bool -- | Enable or disable general debug log. | SetGeneralDebug Bool -- | A queue of control commands for the 'run' loop to execute while running. -- -- Commands can be pushed concurrently from different threads, but there should -- be only one 'run' invocation reading from the queue (i.e. although this is -- unlikely anyway, don't pass the same queue to multiple concurrent 'run' -- calls). newtype CommandQueue = CommandQueue { cqMVar :: MVar [Command] } -- | Details of a news feed to be watched. Some of these are just initial -- settings which you can change while the watcher runs, using the command -- queue. -- -- You can use 'def' and override fields a needed. In that case you should -- specify at least the label and URL! Alternatively, use 'mkFeed' or -- 'fromPairs'. data FeedConfig = FeedConfig { -- | A short name tag by which you can refer to the feed. fcLabel :: Label -- | The HTTP or HTTPS URL of the feed. , fcUrl :: Url -- | Whether the feed should initially be active. Disabled feeds aren't -- visited (except once to remember the old feed items). You can enable -- and disable feeds later, while the watcher runs, using the command -- queue. , fcActive :: Bool -- | Whether per-feed debug logs should be written for this feed. These -- are detailed messages listing the results of each step of the new item -- detection process. You can later toggle this setting per feed using -- the command queue. , fcDebug :: Bool } instance Default FeedConfig where def = FeedConfig { fcLabel = "" , fcUrl = "" , fcActive = True , fcDebug = False } -- | Debugging options. Useful when new items aren't being detected as -- expected. The 'Default' instance has all debugging off. These are just the -- initial settings: you can change some of them while the feed watcher runs, -- using the command queue. -- -- Since debugging is expected to be used only during development, changes to -- the fields of this type will cause only a /micro/ version change in the -- package version. If you think some options here are useful for more than -- debugging, please contact the author. data DebugConfig = DebugConfig { -- | Whether to log a summary of the watcher cycle. The log lists for -- each iteration which feeds are active and which aren't, whether items -- were detected for the active feeds, and whether any command came -- through the command queue. dcDebugCycle :: Bool -- | File into which to write debug messages which aren't specific to a -- single feed. The cycle debug log (see 'dcDebugCycle') is written into -- this file. , dcGeneralLog :: FilePath -- | A function used to determine the detection debug log file for a -- given feed. , dcFeedLog :: Label -> FilePath } deriving Show instance Default DebugConfig where def = DebugConfig { dcDebugCycle = False , dcGeneralLog = "feed-debug.log" , dcFeedLog = \ l -> "feed-debug-" ++ l ++ ".log" } -- | Feed watcher behavior description. To create one, use 'def' and override -- fields as needed. data WatcherConfig m = WatcherConfig { -- | Collector, i.e. action to perform when receiving a feed item. wcCollect :: (Label -> Url -> Feed -> Item -> m ()) -- | Action to perform when receiving feed items. This is just a chance -- to provide an efficient shortcut instead of repeated use of -- 'wcCollect'. If there is no such shortcut, simply pass 'Nothing'. , wcCollectMany :: Maybe (Label -> Url -> Feed -> [Item] -> m ()) -- | Error logging action. It will be called when an error occurs while -- trying to download and parse a feed. Such an error doesn't cause -- anything to stop. Execution simply goes on to read the next feed, and -- will try the erronous feed again in the next round. , wcLogError :: (Label -> Error -> m ()) -- | A command queue you can use to change settings while the program -- runs. For example, you can add a new feed or change the interval -- between polls without relaunching 'run'. , wcCommandQueue :: Maybe CommandQueue -- | Time interval between visits of a watched feed. , wcVisitInterval :: TimeInterval -- | Maximal number of items to collect per visit (if more are available, -- they will be collected in the next visit). , wcMaxItems :: Int -- | List of feeds and their configuration details. , wcFeeds :: [FeedConfig] -- | Debugging options. Useful when new items aren't being detected as -- expected. , wcDebug :: DebugConfig } instance MonadIO m => Default (WatcherConfig m) where def = WatcherConfig { wcCollect = \ l u f i -> liftIO $ collectorPretty l u f i , wcCollectMany = Nothing , wcLogError = \ l e -> liftIO $ putStrLn $ l ++ " : " ++ show e , wcCommandQueue = Nothing , wcVisitInterval = time (1 :: Minute) , wcMaxItems = 3 , wcFeeds = [] , wcDebug = def } -- Since no item field is guaranteed to be provided, we need some way to attach -- a practically-mostly-unique ID to each feed item, so that we can remember -- the items we already collected. Item fields we can expect to be mostly -- unique are: -- -- * ID (this is meant to serve as a unique identifier) -- * Title (usually an item doesn't repeat a past item's title), -- * Date (usually you don't publish 2 items in the very same microsecond) -- * Summary (same idea as the title) -- -- The other fields are based on these, or are likely to be, or aren't expected -- to be unique. -- -- In the (hopefully very rare) case none of these is available, just assume we -- never saw this ID before. -- -- We won't be checking for cases the available fields change over time, e.g. -- suddenly feed items get their IDs published in the XML. If the need ever -- arises, code to handle this can be added. data ItemID = ByID String | ByTitle String | ByTime String | BySummary String | Unique deriving Show instance Eq ItemID where (ByID i) == (ByID j) = i == j (ByTitle i) == (ByTitle j) = i == j (ByTime i) == (ByTime j) = i == j (BySummary i) == (BySummary j) = i == j _ == _ = False -- | Information the feed watcher holds per feed while running. data FeedRecord = FeedRecord { feedName :: String -- A short identifier for easy reference , feedUrl :: String -- The feed's URL , feedOn :: Bool -- Whether the feed is active , feedPrevIDs :: [ItemID] -- Previously collected feed item IDs , feedUpdated :: Maybe UTCTime -- Last time a feed item was published , feedDebug :: Bool -- Whether to create detection debug logs , feedDebugLog :: FilePath -- Debug log file name } deriving Show data State = State { usecInterval :: Int -- Microseconds between polls , maxItemsPerVisit :: Int -- Per-feed items to collect per poll , records :: [FeedRecord] -- Per-feed state , stDebug :: Bool -- Whether to write general debug logs } deriving Show -- | An error occuring while reading from a news feed. data Error -- | Error while creating an HTTP request or receiving a response. = HttpError HttpException -- | Error while parsing the HTTP response body into feed content | FeedParsingFailed Url deriving Show -- | Initialize feed details from a feed label and its URL. mkFeed :: Label -> Url -> FeedConfig mkFeed l u = def { fcLabel = l, fcUrl = u } -- | Create a list of feed configs from a list of label-url pairs. fromPairs :: [(Label, Url)] -> [FeedConfig] fromPairs = map $ uncurry mkFeed showFeedKind :: Feed -> String showFeedKind (AtomFeed _) = "Atom" showFeedKind (RSSFeed _) = "RSS" showFeedKind (RSS1Feed _) = "RSS1" showFeedKind (XMLFeed _) = "XML" -- | A short one-line description of a feed. showFeed :: Feed -> String showFeed feed = unwords [kind, title, "by", author, "at", home] where kind = '(' : showFeedKind feed ++ ")" title = getFeedTitle feed author = fromMaybe none $ getFeedAuthor feed home = fromMaybe none $ getFeedHome feed none = "[?]" -- | A short one-line description of a feed item. showItem :: Item -> String showItem item = unwords [title, "by", author, "at", date] where title = fromMaybe none $ getItemTitle item author = fromMaybe none $ getItemAuthor item date = fromMaybe none $ getItemDate item none = "[?]" -- | An item collector which discards the item, i.e. does nothing. collectorNull :: Label -> Url -> Feed -> Item -> IO () collectorNull _ _ _ _ = return () -- | An item collector which prints feed and item fields to @stdout@. collectorPrint :: Label -> Url -> Feed -> Item -> IO () collectorPrint _label _url feed item = print feed >> print item -- | An item collector which prints short friendly feed and item descriptions -- to @stdout@. collectorPretty :: Label -> Url -> Feed -> Item -> IO () collectorPretty _label _url feed item = do putStrLn $ showFeed feed putStrLn $ showItem item -- | An item collector which writes friendly descriptions into a log file, -- determining the log file's name using the function given as the first -- argument (the feed label, i.e. second argument, is also used as the argument -- for that function). collectorLog :: (Label -> FilePath) -> Label -> Url -> Feed -> Item -> IO () collectorLog getPath label url feed item = withFile (getPath $ getFeedTitle feed) AppendMode $ \ h -> do hPutStrLn h $ label ++ " : " ++ url hPutStrLn h $ showFeed feed hPutStrLn h $ showItem item hPutChar h '\n' -- Maximal number of item IDs to remember maxNumIDs :: Int maxNumIDs = 200 -- Find the value in the first 'Just' available in a list of 'Maybe's findJust :: [Maybe a] -> Maybe a findJust = listToMaybe . catMaybes -- Using the available item fields, get the best hopefully-unique ID we can. itemID :: Item -> ItemID itemID i = fromMaybe Unique $ findJust [ fmap (ByID . snd) $ getItemId i , fmap ByTitle $ getItemTitle i , fmap ByTime $ getItemDate i , fmap BySummary $ getItemSummary i ] -- Parse and get an item's date. If not available, return 'Nothing'. -- The format examples may be partial, i.e. not demonstrate the formats fully. -- -- Since the availability of item date isn't guaranteed, use 2 possible -- representations. One, use the date if available. Two, otherwise, assume a -- timeless value which always compares in a way causing us to find new items -- to collect. itemTime :: Item -> Maybe UTCTime itemTime item = let mdate = getItemDate item dateParsers = -- Sun, 15 Nov 2015 02:45:26 -0200 fmap zonedTimeToUTC . parseTimeRFC2822 : -- 2015-11-15T02:45:26-02:00 fmap zonedTimeToUTC . parseTimeRFC3339 : -- 15 Nov 2015 02:45 -0200 fmap zonedTimeToUTC . parseTimeRFC822 : map (parseTimeM True defaultTimeLocale) [ "%a, %d %b %G %T" -- Sun, 15 Nov 2015 02:45:26 , "%Y-%m-%d" -- 2015-11-15 , "%e %b %Y" -- 15 Nov 2015 , "%a, %e %b %Y %k:%M:%S %z" -- Sun, 15 Nov 2015 2:45:26 -0200 , "%a, %e %b %Y %T %Z" -- Sun, 15 Nov 2015 02:45:26 -0200 ] results = maybe [] (\ date -> map ($ date) dateParsers) mdate in findJust results -- Check whether one 'ItemTime' is more recent than another, for the purpose of -- identifying new feed items. newerThan :: Maybe UTCTime -> Maybe UTCTime -> Bool (Just u) `newerThan` (Just v) = u > v _ `newerThan` _ = True showTime :: FormatTime t => t -> String showTime = formatTime defaultTimeLocale rfc822DateFormat -- Find recent items we haven't collected yet detectNewItems :: Int -> FeedRecord -> Feed -> ([Item], FeedRecord, IO ()) detectNewItems maxItems rec feed = let items = feedItems feed ids = map itemID items times = map itemTime items iids = zip3 items ids times new (_i, iid, t) = iid `notElem` feedPrevIDs rec && t `newerThan` feedUpdated rec iidsAllNew = filter new iids iids' = drop (length iidsAllNew - maxItems) iidsAllNew (items', ids', _times') = unzip3 iids' rec' = rec { feedPrevIDs = take maxNumIDs $ ids' ++ feedPrevIDs rec , feedUpdated = case iids' of (_i, _iid, t) : _ -> t [] -> feedUpdated rec } report = do let updated' = fmap showTime . feedUpdated prevIDs = feedPrevIDs rec prevIDsS = take 5 prevIDs itemsS = take maxItems items iidsS = take maxItems iids prevIDsFinal = feedPrevIDs rec' prevIDsFinalS = take 5 prevIDsFinal h <- openFile (feedDebugLog rec) AppendMode let line = hPutStrLn h nl = hPutChar h '\n' printIT (_, i, t) = line $ show t ++ " " ++ show i t <- getZonedTime line $ replicate 79 '-' line $ showTime t line $ replicate 79 '-' line $ "Label " ++ feedName rec line $ "URL " ++ feedUrl rec line "----------- (1) Before changes ------------" line $ "Active " ++ show (feedOn rec) ++ " (should be True!)" line $ "Updated " ++ fromMaybe "[?]" (updated' rec) line $ "At most " ++ show maxItems ++ " are reported per visit" line $ showFeed feed line $ show (length prevIDs) ++ " previous item IDs logged" nl line $ "Most recent " ++ show (length prevIDsS) ++ " are:" mapM_ (hPrint h) prevIDsS nl line "------------ (2) While running ------------" line $ "Feed has " ++ show (length items) ++ " items" nl line $ "First " ++ show maxItems ++ " from the top are:" mapM_ (line . showItem) itemsS nl line "Their computed times and IDs are:" mapM_ printIT iidsS nl line "Out of all feed items, the following have newly seen IDs:" mapM_ printIT $ filter (\ (_, i, _) -> i `notElem` prevIDs) iids nl line "Out of them, the following are also newer than last update:" mapM_ printIT iidsAllNew nl line "--------------- (3) Result ----------------" line "Out of them, the following have been collected:" mapM_ printIT iids' nl line $ "Updated " ++ fromMaybe "[?]" (updated' rec') line $ show (length prevIDsFinal) ++ " previous item IDs logged" nl line $ "Most recent " ++ show (length prevIDsFinalS) ++ " are:" mapM_ (hPrint h) prevIDsFinalS nl hClose h in (items', rec', report) fetchRaw :: Manager -> String -> IO (Either HttpException String) fetchRaw manager url = let action = do request <- parseUrl url response <- httpLbs request manager return $ Right $ BU.toString $ responseBody response handler e = return $ Left (e :: HttpException) in action `catch` handler -- Try to download a feed from its URL fetch :: Manager -> Url -> IO (Either Error Feed) fetch manager url = do ebody <- fetchRaw manager url return $ case ebody of Left err -> Left $ HttpError err Right body -> case parseFeedString body of Just feed -> Right feed Nothing -> Left $ FeedParsingFailed url -- Fill initial feed record initRec :: MonadIO m => (Label -> Error -> m ()) -> Manager -> FilePath -> FeedConfig -> m FeedRecord initRec logError manager logfile fc = do efeed <- liftIO $ fetch manager (fcUrl fc) let rec = FeedRecord { feedName = fcLabel fc , feedUrl = fcUrl fc , feedOn = fcActive fc , feedPrevIDs = [] , feedUpdated = Nothing , feedDebug = fcDebug fc , feedDebugLog = logfile } case efeed of Right feed -> let items = feedItems feed in return rec { feedPrevIDs = map itemID $ take maxNumIDs items , feedUpdated = listToMaybe items >>= itemTime } Left e -> logError (fcLabel fc) e >> return rec -- Execute a control command exec :: MonadIO m => (Label -> Error -> m ()) -> Manager -> (Label -> FilePath) -> Command -> State -> m State exec logError manager mklog command state@State { records = rs } = case command of AddFeed fc -> do rec <- initRec logError manager (mklog $ fcLabel fc) fc return state { records = rs ++ [rec] } RemoveFeed label -> return state { records = filter ((/= label) . feedName) rs } SetFeedActive label active -> let update rec = if feedName rec == label then rec { feedOn = active } else rec in return state { records = map update rs } SetInterval usec -> return state { usecInterval = usec } SetMaxPerVisit nitems -> return state { maxItemsPerVisit = nitems } SetFeedDebug label debug -> let update rec = if feedName rec == label then rec { feedDebug = debug } else rec in return state { records = map update rs } SetGeneralDebug debug -> return state { stDebug = debug } foldrM :: Monad m => (a -> b -> m b) -> b -> [a] -> m b foldrM _ v [] = return v foldrM f v (x:xs) = f x =<< foldrM f v xs makeLine :: Command -> String makeLine (AddFeed fc) = "Add feed " ++ fcLabel fc makeLine (RemoveFeed l) = "Remove feed " ++ l makeLine (SetFeedActive l True) = "Enable feed " ++ l makeLine (SetFeedActive l False) = "Disable feed " ++ l makeLine (SetInterval usec) = "Set interval to " ++ show usec ++ "usec" makeLine (SetMaxPerVisit n) = "Set max items to " ++ show n makeLine (SetFeedDebug l True) = "Enable debug for feed " ++ l makeLine (SetFeedDebug l False) = "Disable debug for feed " ++ l makeLine (SetGeneralDebug True) = "Enable general debug" makeLine (SetGeneralDebug False) = "Disable general debug" -- Execute all commands waiting in the queue execAll :: MonadIO m => (Label -> Error -> m ()) -> Manager -> (Label -> FilePath) -> CommandQueue -> State -> m (State, [String]) execAll logError manager mklog cq state = do cmds <- liftIO $ modifyMVar (cqMVar cq) $ \ l -> return ([], l) state' <- foldrM (exec logError manager mklog) state cmds return (state', map makeLine cmds) -- | Visit a feed, collect new items and return an updated feed record and the -- number of new collected items. Note that the feed's active setting is -- ignored: It will be visited even if marked as disabled. Therefore, check the -- setting before you call this function. visitFeed :: MonadIO m => Manager -- ^ HTTP connection manager. -> (Label -> Url -> Feed -> [Item] -> m ()) -- ^ Multi-item collector function. -> (Label -> Error -> m ()) -- ^ Error logging function. -> Int -- ^ Maximal number of new items to collect and remember for this visit. -- Remaining new items will be detected in the next visit. -> FeedRecord -- ^ Details of the feed to visit. -> m (FeedRecord, Int) visitFeed manager collectMany logError maxitems rec = do efeed <- liftIO $ fetch manager (feedUrl rec) case efeed of Right feed -> do let (items, rec', report) = detectNewItems maxitems rec feed ritems = reverse items when (feedDebug rec) $ liftIO report collectMany (feedName rec) (feedUrl rec) feed ritems return (rec', length ritems) Left e -> do logError (feedName rec) e return (rec, -1) -- | A single watcher loop iteration. runIteration :: MonadIO m => (Int -> FeedRecord -> m (FeedRecord, Int)) -> (State -> m (State, [String])) -> FilePath -> State -> m State runIteration visit execCmds logfile state = do liftIO $ threadDelay $ usecInterval state let (recsOn, recsOff) = partition feedOn $ records state pairs <- mapM (visit $ maxItemsPerVisit state) recsOn let recsOn' = map fst pairs stateCollected = state { records = recsOn' ++ recsOff } (stateExec, actions) <- execCmds stateCollected liftIO $ when (stDebug state) $ withFile logfile AppendMode $ \ h -> do let line = hPutStrLn h fline label s = line $ label ++ ": " ++ s nl = hPutChar h '\n' t <- getZonedTime line $ showTime t line $ "Feeds: " ++ show (length $ records stateExec) let info n | n == 0 = "No new items" | n == -1 = "Error" | otherwise = show n ++ " new items" mapM_ (\ (r, n) -> fline (feedName r) $ info n) pairs mapM_ (\ r -> fline (feedName r) "Disabled") recsOff line $ "Commands: " ++ show (length actions) mapM_ line actions nl return stateExec -- | Watch feeds and perform the given action on received feed items. run :: MonadIO m => WatcherConfig m -> m () run wc = do let mapCollect l u f = mapM_ $ (wcCollect wc) l u f collectMany = fromMaybe mapCollect $ wcCollectMany wc logError = wcLogError wc dc = wcDebug wc mklog = dcFeedLog dc manager <- liftIO $ newManager tlsManagerSettings let mkRecord fc = initRec logError manager (mklog $ fcLabel fc) fc initialRecords <- mapM mkRecord $ wcFeeds wc let initialState = State { usecInterval = fromInteger $ microseconds $ wcVisitInterval wc , maxItemsPerVisit = wcMaxItems wc , records = initialRecords , stDebug = dcDebugCycle dc } execCmds st = case wcCommandQueue wc of Just cq -> execAll logError manager mklog cq st Nothing -> return (st, []) visit = visitFeed manager collectMany logError iter = runIteration visit execCmds (dcGeneralLog dc) loop state = iter state >>= loop loop initialState -- | Create a new empty command queue. newCommandQueue :: IO CommandQueue newCommandQueue = liftM CommandQueue $ newMVar [] -- | Send a command into the queue. sendCommand :: CommandQueue -> Command -> IO () sendCommand cq cmd = modifyMVar_ (cqMVar cq) $ \ l -> return $ cmd : l -- | Send a series of commands into the queue. sendCommands :: CommandQueue -> [Command] -> IO () sendCommands cq cmds = modifyMVar_ (cqMVar cq) $ \ l -> return $ reverse cmds ++ l -- | Add a new feed to watch. addFeed :: FeedConfig -> Command addFeed = AddFeed -- | Remove a previously added feed. removeFeed :: Label -> Command removeFeed = RemoveFeed -- | Set whether a given feed should be watched (active) or not (inactive). setFeedActive :: Label -> Bool -> Command setFeedActive = SetFeedActive -- | Set the interval, in microseconds, between feed scans. setInterval :: TimeUnit t => t -> Command setInterval = SetInterval . fromInteger . toMicroseconds -- | Set the maximal number of feed items to be collected per feed per scan. If -- more new items are found, they wait for the next scan. setMaxPerVisit :: Int -> Command setMaxPerVisit = SetMaxPerVisit -- | Enable or disable debug logs for a given feed. setFeedDebug :: Label -> Bool -> Command setFeedDebug = SetFeedDebug -- | Enable or disable general (i.e. not feed-specific) debug logs. setGeneralDebug :: Bool -> Command setGeneralDebug = SetGeneralDebug