{-# LANGUAGE PatternGuards,BangPatterns #-} -------------------------------------------------------------------- -- | -- Module: Watch an RSS/Atom feed and write it to an IRC channel -- Copyright : (c) Don Stewart 2008-2009, Simon Michael 2009 -- License : BSD3 -- More info : rss2irc.cabal -- -------------------------------------------------------------------- import Control.Concurrent (forkIO,threadDelay) import Control.Concurrent.Chan (Chan,newChan,writeList2Chan,readChan,unGetChan) import Control.Concurrent.SampleVar (SampleVar,newSampleVar,writeSampleVar,readSampleVar) import Control.Monad (when,unless,forever) import Data.List (isPrefixOf,foldl',stripPrefix,intercalate) import Data.List.Split (splitOn) import Data.Either (either) import Data.Maybe (fromMaybe,fromJust,isJust,isNothing) import Data.Time.Clock (UTCTime,getCurrentTime,diffUTCTime) import Data.Time.Format (parseTime) import Data.Time.LocalTime (LocalTime,getCurrentTimeZone,utcToLocalTime) import Locale (defaultTimeLocale) import Network (PortID(PortNumber), connectTo) import Network.Browser (browse,request,setAllowRedirects,setOutHandler) import Network.HTTP (Response,getRequest,rspBody,rspCode) import Network.IRC (Message(Message),msg_command,msg_params,decode,encode,nick,user,joinChan,privmsg) -- part,quit) import Network.URI (URI(URI),uriScheme,parseURI) import qualified Control.Exception.Extensible as E (bracket,catch) import Control.Exception.Extensible (fromException) import Control.Parallel.Strategies (NFData) --(rnf),($|)) import System.Console.GetOpt (OptDescr(Option), ArgDescr(ReqArg,NoArg), ArgOrder(Permute), getOpt, usageInfo) import System.Environment (getArgs) import System.Exit (exitWith, ExitCode(ExitSuccess), exitFailure) import System.IO (Handle, BufferMode(NoBuffering),stdout,hSetBuffering,hFlush,hClose,hGetLine) import Text.Feed.Import (parseFeedString) import Text.Feed.Constructor (withItemDescription) import Text.Feed.Query (feedItems ,getItemTitle ,getItemLink ,getItemPublishDate ,getItemDate ,getItemAuthor ,getItemCommentLink ,getItemEnclosure ,getItemFeedLink ,getItemId ,getItemCategories ,getItemRights ,getItemSummary ,getItemDescription ) import Text.Feed.Types (Feed(XMLFeed),Item) import Text.Printf (printf,hPrintf) import Text.RegexPR (splitRegexPR,gsubRegexPR) -- import Debug.Trace -- strace :: Show a => a -> a -- strace a = trace (show a) a defaultport, defaultinterval, defaultidle, defaultmaxitems, maxmessagelength, maxtitlelength, maxdesclength, maxauthorlength, maxdatelength, maxlinklength :: Int defaultport = 6667 -- default irc port defaultinterval = 5 -- default polling interval in minutes defaultidle = 0 -- default required silent time before announcing defaultmaxitems = 5 -- default max items to announce per interval maxmessagelength = 400 -- max characters per irc message -- max field sizes. Max announcement length will be the sum of these -- plus typically 15 due to prettification plus any length increase -- due to --replace. The below should keep most announcements within -- maxmessagelength and all announcements within maxmessagelength * 2 or so. maxtitlelength = 100 maxdesclength = 300 maxauthorlength = 50 maxdatelength = 50 maxlinklength = 200 announcestrategies :: [String] -- how to detect announceable items: announcestrategies = ["topnew" -- new unseen items at the top (good for most feeds) ,"allnew" -- new unseen items anywhere (announces items of a feed newly added to a planet) ,"top" -- any items above the previous top item ] defaultannouncestrategy :: String defaultannouncestrategy = head announcestrategies options :: [OptDescr Opt] options = [Option ['p'] ["port"] (ReqArg Port "PORT") "irc server port (default 6667)" ,Option [] ["ident"] (ReqArg Ident "STR") "set the bot's identity string (useful for contact info)" ,Option [] ["delay"] (ReqArg Delay "N") "wait for N minutes before starting (helps avoid mass joins)" ,Option ['i'] ["interval"] (ReqArg Interval "N") ("polling interval in minutes (default "++(show defaultinterval)++")") ,Option [] ["idle"] (ReqArg Idle "N") ("announce only when channel has been idle N minutes (default "++(show defaultidle)++")") ,Option ['m'] ["max-items"] (ReqArg MaxItems "N") ("announce at most N items per polling interval (default "++(show defaultmaxitems)++")") ,Option ['r'] ["recent"] (ReqArg Recent "N") "announce up to N recent items at startup (default 0)" ,Option [] ["announce"] (ReqArg Announce $ intercalate "|" announcestrategies) ("which items to announce (default: "++defaultannouncestrategy++")") ,Option [] ["no-title"] (NoArg NoTitle) ("don't show title (title is announced by default, up to "++(show maxtitlelength)++" chars)") ,Option ['a'] ["author"] (NoArg Author) ("show author (up to "++(show maxauthorlength)++" chars)") ,Option ['d'] ["description"] (NoArg Description) ("show description (up to "++(show maxdesclength)++" chars)") ,Option ['l'] ["link"] (NoArg Link) ("show link URL (up to "++(show maxlinklength)++" chars)") ,Option ['t'] ["time"] (NoArg Time) ("show timestamp (up to "++(show maxdatelength)++" chars)") ,Option ['e'] ["email"] (NoArg Email) "show email addresses (stripped by default)" ,Option ['h'] ["html"] (NoArg Html) "show HTML tags and entities (stripped by default)" ,Option [] ["dupe-descriptions"] (NoArg DupeDescriptions) "show identical consecutive descriptions (elided by default)" ,Option [] ["replace"] (ReqArg Replace "\"OLD/NEW\"") "replace OLD with NEW (regexpr patterns)" ,Option ['n'] ["num-iterations"] (ReqArg NumIterations "N") "exit after N iterations" ,Option ['q'] ["quiet"] (NoArg Quiet) "silence normal console output" ,Option [] ["debug"] (NoArg Debug) "do not connect to irc (same as no irc argument)" ,Option [] ["debug-irc"] (NoArg DebugIrc) "show irc activity" ,Option [] ["debug-feed"] (NoArg DebugFeed) "show feed items and polling stats" ,Option [] ["debug-xml"] (NoArg DebugXml) "show feed content" ,Option [] ["debug-http"] (NoArg DebugHttp) "show feed fetching progress" ] help :: IO a help = do putStrLn "Usage: rss2irc FEEDURL [BOTNAME@IRCSERVER/#CHANNEL] [OPTS]" putStrLn "Options:" putStrLn (usageInfo "" options) exitWith ExitSuccess data Opt = Port {value::String} | Ident {value::String} | Delay {value::String} | Interval {value::String} | MaxItems {value::String} | Recent {value::String} | Announce {value::String} | Idle {value::String} | Author | Description | Time | Link | NoTitle | Email | Html | DupeDescriptions | Replace {value::String} | NumIterations {value::String} | Quiet | Debug | DebugHttp | DebugXml | DebugFeed | DebugIrc deriving (Eq, Show) data Bot = Bot { socket :: Handle , server :: String , port :: !Int , channel :: String , botnick :: String , botopts :: ![Opt] , outputqueue :: Chan String , lastmsgtime :: UTCTime } instance NFData Item main :: IO () main = do (opts, args, errs) <- getOpt Permute options `fmap` getArgs let delay = optIntValue Delay 0 opts interval = optIntValue Interval defaultinterval opts announcestrategy = optValue Announce defaultannouncestrategy opts errs' = errs ++ if (interval > 0 || (isJust $ numIterations opts)) then [] else ["Eh.. no."] ++ if announcestrategy `elem` announcestrategies then [] else ["--announce should be one of "++intercalate ", " announcestrategies] when (not . null $ errs') $ mapM_ putStrLn errs' >> help -- force early failure if there is a bad regexp. XXX Don't know how to catch this yet. seq (applyReplacements opts "") $ return () when (delay > 0) $ threadDelay $ delay * minutes q <- newChan t <- getCurrentTime (feed,bot) <- case args of [f,nsc] -> case map (splitOn "@") $ splitOn "/" $ maybe nsc id (stripPrefix "irc://" nsc) of [[n,s],[c]] -> return (f, Bot{ socket = stdout , server = s , port = optIntValue Port defaultport opts , channel = c , botnick = n , botopts = opts , outputqueue = q , lastmsgtime = t }) _ -> help [f] -> return (f, Bot{ socket = stdout , server = "" , port = 0 , channel = "" , botnick = "" , botopts = Debug:opts , outputqueue = q , lastmsgtime = t }) _ -> help -- XXX error handling needs review. run tries to handle some errors by -- restarting its threads, but does not reconnect the bot. Some errors -- might be caught but not propagated here, in which case the thread -- exits. Errors which do propagate here cause a program exit. E.bracket (connect bot) (disconnect) (\b -> run b feed `E.catch` exit) where exit e = case fromException e of Just ExitSuccess -> exitWith ExitSuccess _ -> getTimeStamp >>= \t -> printf "%s: rss2irc error: run died with: %s, exiting\n" t (show e) >> exitFailure -- | Connect to the irc server. connect :: Bot -> IO Bot connect b | Debug `elem` (botopts b) = do unless (Quiet `elem` botopts b) $ printf "Skipping IRC connection due to --debug\n" return b connect b@(Bot{server=s,port=p,channel=c,botnick=n,botopts=opts}) = do let ident = optValue Ident "rss2irc gateway" opts unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: %s connecting to %s, channel %s...\n" t n s c >> hFlush stdout h <- connectTo s (PortNumber $ fromIntegral p) hSetBuffering h NoBuffering let b' = b{socket=h} ircWrite b' $ encode $ nick n ircWrite b' $ encode $ user n "0" "*" ident (connected,err) <- ircWaitForServerResp b' -- some servers require this, eg quakenet if not connected then fail $ printf "rss2irc error: irc connection failed with %s\n" err else do ircWrite b' $ encode $ joinChan c unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: connected.\n" t >> hFlush stdout return b' -- | Given a connected bot, start various threads to poll and announce, -- restarting them if they fail. run :: Bot -> String -> IO () run b@(Bot{botopts=opts}) f = do -- XXX do errors in forked threads propagate to the bracket above ? I think not. forkIO $ forever $ do feedReaderThread b f `catch` \e -> (unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: rss2irc error: feed reader thread died with %s, restarting\n" t (show e) >> hFlush stdout) bv <- newSampleVar b -- XXX reconnect bot when these fail (?) -- forkIO $ forever $ do forkIO $ ircWriterThread bv 0 -- `E.catch` \e -> (unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: rss2irc error: irc writer thread died with %s, restarting\n" t (show $ fromException e) >> hFlush stdout) -- forever $ do ircResponderThread bv -- `E.catch` \e -> (unless (Quiet `elem` opts) $ getTimeStamp >>= \t -> printf "%s: rss2irc error: irc responder thread died with %s, restarting\n" t (show $ fromException e) >> hFlush stdout) -- | Disconnect from the irc server. disconnect :: Bot -> IO () disconnect b | Debug `elem` (botopts b) = return () disconnect b = hClose $ socket b -- feed stuff -- | Poll the feed every interval minutes and send announceable items -- to the announcer thread. -- -- Some smartness is needed to be robust with real-world feeds, which -- may have jitter due to http caching, unreliable feed order, -- unpredictable or missing item dates, etc. The most robust approach -- seems to be: assume that feeds provide items sorted newest -- first. Then, announceable items are the new (newer pub date than -- the last announced item) and unseen (id not among the last N ids -- seen since startup) items which appear at the top of the feed. This -- is the default "topnew" strategy. We support some other strategies -- which may be useful in some cases: "allnew" (announce new unseen -- items appearing anywhere in the feed) and "top" (announce items -- appearing about the previous top item, new or not). -- feedReaderThread :: Bot -> String -> IO () feedReaderThread b@(Bot{botopts=opts,outputqueue=q}) url = do when (iterations == Just 0) $ exitIterations unless (Quiet `elem` opts) $ printf "Polling %s every %s\n" url m >> hFlush stdout (is,polls,failedpolls) <- pollUntilFetchItems url delay opts 0 0 -- go no further until we have baseline items let seen = map itemId is recent = take (optIntValue Recent 0 opts) is announceable = reverse recent announceable' = (if DupeDescriptions `elem` opts then id else elideDuplicateDescriptions) announceable lastpubdate = if null announceable' then Nothing else getItemPublishDate $ last announceable' numannounced = fromIntegral $ length announceable' when (DebugFeed `elem` opts) $ do getTimeStamp >>= printf ("\n%s: polled " ++ url ++ "\n") printItemDetails "feed items, in feed order" is printItemDetails "announceable items, oldest first" announceable printf "total polls, failed polls, items announced: %10d %10d %10d\n" polls failedpolls numannounced hFlush stdout writeList2Chan q $ map (announcement b) announceable' go seen lastpubdate iterations numannounced polls failedpolls where go :: [String] -> Maybe String -> Maybe Int -> Integer -> Integer -> Integer -> IO () go !seen !lastpubdate !iterationsleft !numannounced !polls !failedpolls = do when (iterationsleft == Just 0) $ exitIterations threadDelay delay when (DebugFeed `elem` opts) $ do getTimeStamp >>= \t -> printf "\n%s: polling %s\n" t url fetched <- fetchItems url opts let polls' = polls + 1 failedpolls' | isLeft fetched = failedpolls+1 | otherwise = failedpolls fetched' = either (const []) id fetched announcestrategy = optValue Announce defaultannouncestrategy opts isprevioustop = ((== head seen).itemId) isunseen = ((`notElem` seen).itemId) isnew = (`notOlderThan` lastpubdate) isnewunseen i = isnew i && isunseen i new = case announcestrategy of "top" -> takeWhile (not.isprevioustop) fetched' "allnew" -> filter isnewunseen fetched' _ -> takeWhile isnewunseen fetched' -- "topnew" seen' = take windowsize $ (map itemId new) ++ seen announceable = reverse new announceable' = (if DupeDescriptions `elem` opts then id else elideDuplicateDescriptions) announceable lastpubdate' = if null announceable' then lastpubdate else getItemPublishDate $ last announceable' numannounced' = numannounced + (fromIntegral $ length announceable') iterationsleft' = maybe Nothing (Just . pred) iterationsleft when (DebugFeed `elem` opts) $ do printItemDetails "feed items, in feed order" fetched' printItemDetails "announceable items, oldest first" announceable printf "total polls, failed polls, items announced: %10d %10d %10d\n" polls' failedpolls' numannounced' hFlush stdout writeList2Chan q $ map (announcement b) announceable' go seen' lastpubdate' iterationsleft' numannounced' polls' failedpolls' iterations = numIterations opts exitIterations = unless (Quiet `elem` opts) (printf "Exiting after %d iterations.\n" (fromJust iterations)) >> exitWith ExitSuccess interval = optIntValue Interval defaultinterval opts m = if interval==1 then "minute" else show interval ++ " minutes" delay = interval * minutes windowsize = 200 -- | Check if an item's publish date is older than another date. -- Either date may be Nothing, a parseable date string or unparseable. -- In the (likely) event we can't parse two dates, return True. notOlderThan :: Item -> Maybe String -> Bool notOlderThan _ Nothing = True notOlderThan i (Just s1) = case getItemPublishDate i of Nothing -> True Just s2 -> case (parseDateTime s1, parseDateTime s2) of (Just d1, Just d2) -> d2 >= d1 _ -> True -- | Elide any identical consecutive item descriptions. elideDuplicateDescriptions :: [Item] -> [Item] elideDuplicateDescriptions = elidedupes Nothing where elidedupes :: Maybe String -> [Item] -> [Item] elidedupes _ [] = [] elidedupes (Just lastdesc) (i:is) | getItemDescription i==Just lastdesc = [withItemDescription ditto i] ++ elidedupes (Just lastdesc) is where ditto = "''" -- or http://en.wikipedia.org/wiki/Ditto_mark : 〃 elidedupes _ (i:is) = [i] ++ elidedupes (getItemDescription i) is -- | Get the best available unique identifier for a feed item. itemId :: Item -> String itemId i = case getItemId i of Just (_,s) -> s Nothing -> case getItemTitle i of Just s -> s Nothing -> case getItemDate i of Just s -> s Nothing -> show i -- | Like fetchItems, but if it fails with a transient error, keep -- retrying at the specified interval. Returns a tuple of items, poll -- and failed poll counts, or throws an IO error. pollUntilFetchItems :: String -> Int -> [Opt] -> Integer -> Integer -> IO ([Item],Integer,Integer) pollUntilFetchItems url delay opts polls failedpolls = do is <- fetchItems url opts case is of Right is' -> return (is',polls+1,failedpolls) Left _ -> do threadDelay delay pollUntilFetchItems url delay opts (polls+1) (failedpolls+1) -- | Get the items from the feed at the specified url, with redirects -- and authentication allowed, or an error string, or throw an IO -- error if the error looks permanent. fetchItems :: String -> [Opt] -> IO (Either String [Item]) fetchItems url opts = either Left (Right . feedItems) `fmap` readFeed url opts -- | Fetch a feed, with redirects and authentication allowed, or an error string, -- or throw an IO error if the error looks permanent. Also show the raw content -- as debug output if that option is in effect. readFeed :: String -> [Opt] -> IO (Either String Feed) readFeed url opts = do s <- readUri url opts case s of Left e -> return $ Left e Right s' -> do when (DebugXml `elem` opts) $ do getTimeStamp >>= \t -> printf "\n%s: feed content:\n%s\n" t s' case parseFeedString s' of Nothing -> noparse Just (XMLFeed _) -> noparse Just f -> return $ Right f where noparse = return $ Left "could not parse feed" -- | Fetch the contents of a uri, with redirects and authentication allowed, or an error string, -- or throw an IO error if the error looks permanent. Also show the http transaction progress -- as debug output if that option is in effect. "file:..." uris are also allowed. readUri :: String -> [Opt] -> IO (Either String String) readUri uri opts = case parseURI uri of Just URI{uriScheme="file:"} -> do Right `fmap` readFile (drop 5 uri) `catch` \e -> return $ Left $ show e _ -> do (_uri',rsp) <- browse $ do if (DebugHttp `elem` opts) then setOutHandler (\s -> getTimeStamp >>= \t -> printf "\n%s: %s\n" t s) else setOutHandler (const $ return ()) setAllowRedirects True request $ getRequest uri case rspCode rsp of (2,_,_) -> return $ Right $ rspBody rsp code -> do getTimeStamp >>= \t -> printf "%s: rss2irc error fetching %s: %s\n" t uri (show code) >> hFlush stdout return $ Left $ show code -- | Dump item details to the console for debugging. printItemDetails :: String -> [Item] -> IO () printItemDetails hdr is = printf "%s: %d\n%s" hdr count items >> hFlush stdout where items = unlines [printf " %-29s%s %-*s" d p twidth t | (d,p,t,_) <- fields] twidth = maximum $ map (length.fromMaybe "".getItemTitle) is -- subhdr = "(date, (publish date if different), title)\n" -- subhdr' = if null is then "" else subhdr count = length is fields = [(d, if p==d then "" else printf " pubdate:%-29s" p, t, i) | item <- is ,let d = fromMaybe "" $ getItemDate item ,let p = fromMaybe "" $ getItemPublishDate item ,let t = fromMaybe "" $ getItemTitle item ,let i = maybe "" show $ getItemId item ] -- deriving instance Eq Item instance Eq Item where (==) a b = let match f = f a == f b in all match [getItemTitle ,getItemLink ,getItemPublishDate ,getItemDate ,getItemAuthor ,getItemCommentLink ,getItemFeedLink ,getItemRights ,getItemSummary ,getItemDescription ] && match getItemCategories && match getItemEnclosure && match getItemId -- | Convert a feed item to a string for the bot to announce on irc. -- The announcement is likely but not guaranteed to fit within a -- single irc message. announcement:: Bot -> Item -> String announcement (Bot{botopts=opts}) i = applyReplacements opts $ printf "%s%s%s%s%s" title desc author date link where title = if elem NoTitle opts then "" else maybe "" (truncateWordsAt maxtitlelength "..." . clean) (getItemTitle i) desc = ifopt Description $ maybe "" ((" - "++) . truncateWordsAt maxdesclength "..." . clean) (getItemDescription i) author = ifopt Author $ maybe "" ((" "++) . parenthesise . truncateWordsAt maxauthorlength "..." . clean) (getItemAuthor i) date = ifopt Time $ maybe "" ((" "++) . truncateAt maxdatelength "..." . clean) (getItemDate i) link = ifopt Link $ maybe "" ((" "++) . truncateAt maxlinklength "..." . clean) (getItemLink i) clean = oneline . trimwhitespace . striphtml . stripemail ifopt o = if elem o opts then id else const "" oneline = intercalate " " . map strip . lines -- two spaces to hint at newlines & brs trimwhitespace = gsubRegexPR "[ \t][ \t]+" " " striphtml = if elem Html opts then id else stripHtml . brtonewline brtonewline = gsubRegexPR "(<|<) *br */?(>|>)" "\n" stripemail = if elem Email opts then id else stripEmails parenthesise = (++")").("("++) -- irc stuff -- | Wait for server connection confirmation. ircWaitForServerResp :: Bot -> IO (Bool,String) ircWaitForServerResp b@(Bot{socket=h,botopts=opts}) = do if (Debug `elem` opts) then return (True,"") else do s <- hGetLine h when (DebugIrc `elem` opts) $ getTimeStamp >>= \t -> printf "%s: <-%s\n" t s >> hFlush stdout if isping s then pong b s >> ircWaitForServerResp b else do if isResponseOK s then return (True, s) else if isNotice s then ircWaitForServerResp b else return (False, s) where parseRespCode x = if length (words x) > 1 then (words x) !! 1 else "000" isResponseOK x = (parseRespCode x) `elem` [ "001", "002", "003", "004" ] isNotice x = (head $ parseRespCode x) `elem` ('0':['a'..'z']++['A'..'Z']) -- | Print announcements appearing in the bot's announce queue to its irc channel, -- complying with bot and irc server policies. ircWriterThread :: SampleVar Bot -> Int -> IO () ircWriterThread bv batchindex = do b'@(Bot{outputqueue=q,botopts=opts}) <- readSampleVar bv writeSampleVar bv b' ann <- readChan q -- policy: -- if specified, wait for --idle minutes of silence before sending messages -- no more than 400 chars per message -- no more than one message per 2s -- XXX on freenode, 6 such messages still cause a flood. Try limiting chars-per-period, or do a ping-pong -- no more than --max-items items per polling interval -- ditto for messages, except a final multi-message item will be completed. -- reread the samplevar to get an accurate idle time b <- readSampleVar bv writeSampleVar bv b idle <- channelIdleTime b let maxitems = optIntValue MaxItems defaultmaxitems opts requiredidle = optIntValue Idle defaultidle opts -- minutes pollinterval = optIntValue Interval defaultinterval opts -- minutes idleinterval = max (requiredidle-idle) 1 -- minutes sendinterval = if Debug `elem` opts then 0 else 2 -- seconds iscontinuation = continuationprefix `isPrefixOf` ann act | batchindex >= maxitems && not iscontinuation = (do when (DebugIrc `elem` opts) $ getTimeStamp >>= \t -> printf "%s: sent %d messages in this batch, max is %d, sleeping for %dm\n" t batchindex maxitems pollinterval >> hFlush stdout threadDelay $ pollinterval * minutes ircWriterThread bv 0) | idle < requiredidle = (do when (DebugIrc `elem` opts) $ getTimeStamp >>= \t -> printf "%s: channel has been idle %dm, %dm required, sleeping for %dm\n" t idle requiredidle idleinterval >> hFlush stdout unGetChan q ann threadDelay $ idleinterval * minutes ircWriterThread bv batchindex) | otherwise = (do when (DebugIrc `elem` opts) $ getTimeStamp >>= \t -> printf "%s: sent %d messages in this batch and channel has been idle %dm, sending next\n" t batchindex idle >> hFlush stdout let (msg,rest) = splitAnnouncement ann unless (null rest) $ unGetChan q rest ircPrivmsgH b msg threadDelay $ sendinterval * seconds ircWriterThread bv (batchindex+1)) act -- | The time in minutes since the last message on this bot's channel, or -- otherwise since joining the channel. Leap seconds are ignored. channelIdleTime :: Bot -> IO Int channelIdleTime (Bot{lastmsgtime=t1}) = do t <- getCurrentTime return $ round (diffUTCTime t t1) `div` 60 -- | Handle any incoming commands from the bot's irc channel. -- The following commands are supported: PING. -- Also track the last message time. ircResponderThread :: SampleVar Bot -> IO () ircResponderThread bv = do b@(Bot{socket=h,botopts=opts}) <- readSampleVar bv writeSampleVar bv b if (Debug `elem` opts) then threadDelay $ 1 * hours else do s <- hGetLine h let s' = init s when (DebugIrc `elem` opts) $ (getTimeStamp >>= \t -> printf "%s: <-%s\n" t s') >> hFlush stdout let respond | ismessage s = do t <- getCurrentTime writeSampleVar bv b{lastmsgtime=t} | isping s = pong b s' | otherwise = return () respond ircResponderThread bv ismessage :: String -> Bool ismessage s = isprivmsg s && not ("VERSION" `elem` (msg_params $ fromJust $ decode s)) isprivmsg :: String -> Bool isprivmsg s = case decode s of Just Message{msg_command="PRIVMSG"} -> True _ -> False isping :: String -> Bool isping s = case decode s of Just Message{msg_command="PING"} -> True _ -> False pong :: Bot -> String -> IO () pong b x = ircWrite b $ printf "PONG :%s" (drop 6 x) -- | Send a privmsg to the bot's irc server & channel. ircPrivmsgH :: Bot -> String -> IO () ircPrivmsgH b@(Bot{channel=c}) msg = ircWrite b $ encode $ privmsg c msg -- | Send a string to the bot's irc server unless --debug is in effect, -- and to the console if --debug-irc is in effect. ircWrite :: Bot -> String -> IO () ircWrite (Bot{socket=h,botopts=opts}) s = do when (not $ Debug `elem` opts) $ hPrintf h (s++"\r\n") debugoutput where debugoutput | DebugIrc `elem` opts = getTimeStamp >>= \t -> printf "%s: ->%s\n" t s >> hFlush stdout | not (Quiet `elem` opts) && ("PRIVMSG" `isPrefixOf` s) = (printf "%s\n" $ drop 1 $ dropWhile (/=':') s) >> hFlush stdout | otherwise = return () -- utils -- | Split an announcement into one or more suitably truncated and -- formatted irc messages. Each call returns the next message and -- the remainder of the announcement. -- XXX n must be > length continuationsuffix splitAnnouncement :: String -> (String,String) splitAnnouncement a | length a <= maxmessagelength = (a,"") | otherwise = case splitAtWordBefore n a of (m,rest@(_:_)) -> (m++continuationsuffix, continuationprefix++rest) (m,"") -> (m, "") where n = maxmessagelength - length continuationsuffix continuationprefix, continuationsuffix :: String continuationprefix = "... " continuationsuffix = " ..." -- | Truncate a string, if possible at a word boundary, at or before -- the specified position, and indicate truncation with the specified -- suffix. The length of the returned string will be in the range -- n, n+length suffix. truncateWordsAt :: Int -> String -> String -> String truncateWordsAt n suffix s | s' == s = s | otherwise = s' ++ suffix where s' = fst $ splitAtWordBefore n s -- | Truncate a string at the specified position, and indicate -- truncation with the specified suffix. The length of the returned -- string will be in the range n, n+length suffix. truncateAt :: Int -> String -> String -> String truncateAt n suffix s | s' == s = s | otherwise = s' ++ suffix where s' = take n s -- | Split a string at or before the specified position, on a word boundary if possible. splitAtWordBefore :: Int -> String -> (String,String) splitAtWordBefore n s | null a || (null b) = (rstrip a, lstrip b) | last a == ' ' || (head b == ' ') || (not $ ' ' `elem` a) = (rstrip a, lstrip b) | otherwise = (rstrip $ take (length a - length partialword) a, partialword ++ lstrip b) where (a,b) = splitAt n s partialword = reverse $ takeWhile (/= ' ') $ reverse a -- | Apply all --replace substitutions to a string, in turn. -- Warning, will fail at runtime if there is a bad regexp. applyReplacements :: [Opt] -> String -> String applyReplacements opts = foldl' (.) id (reverse substitutions) where substitutions = map make $ optValues Replace opts make s = case splitRegexPR "(? gsubRegexPR pat sub _ -> id -- | Replace any HTML tags or entities in a string with a single space. stripHtml :: String -> String stripHtml = gsubRegexPR "(&[^ \t]*?;|<.*?>)" " " -- | Remove any email addresses from a string. stripEmails :: String -> String stripEmails = gsubRegexPR "(?i) ?(<|<)?\\b[-._%+a-z0-9]+@[-.a-z0-9]+\\.[a-z]{2,4}\\b(>|>)?" "" optValue :: (String -> Opt) -> String -> [Opt] -> String optValue oc def = head . optValues oc . (++[oc def]) optIntValue :: (String -> Opt) -> Int -> [Opt] -> Int optIntValue oc def opts = fromMaybe def $ maybeRead $ optValue oc "" opts optValues :: (String -> Opt) -> [Opt] -> [String] optValues oc opts = concatMap getval opts where getval o = if oc v == o then [v] else [] where v = value o numIterations :: [Opt] -> Maybe Int numIterations opts = case optIntValue NumIterations (-1) opts of (-1) -> Nothing n -> Just n maybeRead :: Read a => String -> Maybe a maybeRead s = case reads s of [(x, _)] -> Just x _ -> Nothing -- | Parse a datetime string if possible, trying at least the formats -- likely to be used in RSS/Atom feeds. parseDateTime :: String -> Maybe UTCTime parseDateTime s = firstJust [parseTime defaultTimeLocale f s' | f <- formats] where s' = adaptForParseTime s adaptForParseTime = gsubRegexPR "(....-..-..T..:..:..[\\+\\-]..):(..)" "\\1\\2" -- 2009-09-22T13:10:56+00:00 formats = -- http://hackage.haskell.org/packages/archive/time/1.1.4/doc/html/Data-Time-Format.html#v%3AformatTime [ "%a, %d %b %Y %T %z" -- Fri, 18 Sep 2009 12:42:07 -0400 ,"%a, %d %b %Y %T %Z" -- Fri, 25 Sep 2009 11:01:23 UTC ,"%Y-%m-%dT%T%z" -- 2009-09-22T13:10:56+0000 ] firstJust :: [Maybe a] -> Maybe a firstJust ms = case dropWhile isNothing ms of (m:_) -> m _ -> Nothing getCurrentLocalTime :: IO LocalTime getCurrentLocalTime = do t <- getCurrentTime tz <- getCurrentTimeZone return $ utcToLocalTime tz t getTimeStamp :: IO String getTimeStamp = do t <- getCurrentLocalTime tz <- getCurrentTimeZone return $ printf "%s %s" (take 19 $ show t) (show tz) isLeft :: Either a b -> Bool isLeft (Left _) = True isLeft _ = False -- strict :: NFData a => a -> a -- strict = id $| rnf hours, minutes, seconds :: Int hours = 60 * minutes minutes = 60 * seconds seconds = 10^(6::Int) strip, lstrip, rstrip, dropws :: String -> String strip = lstrip . rstrip lstrip = dropws rstrip = reverse . dropws . reverse dropws = dropWhile (`elem` " \t")