{-# LANGUAGE PatternGuards, BangPatterns, DeriveDataTypeable, OverloadedStrings, ScopedTypeVariables #-} {- | Common utilities. Copyright (c) Don Stewart 2008-2009, Simon Michael 2009-2014 License: BSD3. -} module Utils ( module Utils, module Debug.Trace ) where import Codec.Binary.UTF8.String as UTF8 (decodeString, encodeString, isUTF8Encoded) import Control.Concurrent import Control.Monad import Data.List import Data.Maybe import Data.Time.Clock (UTCTime,getCurrentTime) import Data.Time.Format (parseTime) import Data.Time.LocalTime (LocalTime,getCurrentTimeZone,utcToLocalTime) import Prelude hiding (log) import System.Info import System.IO (stdout,hFlush) import System.Locale (defaultTimeLocale) import Text.Feed.Query import Text.Feed.Types (Item) import Text.ParserCombinators.Parsec hiding (label) import Text.Printf (printf) import Text.RegexPR (splitRegexPR,gsubRegexPR) import Base import Debug.Trace -- | trace a showable expression strace :: Show a => a -> a strace a = trace (show a) a -- | labelled trace - like strace, with a label prepended ltrace :: Show a => String -> a -> a ltrace l a = trace (l ++ ": " ++ show a) a -- | monadic trace - like strace, but works as a standalone line in a monad mtrace :: (Monad m, Show a) => a -> m a mtrace a = strace a `seq` return a -- | trace an expression using a custom show function tracewith :: (a -> String) -> a -> a tracewith f e = trace (f e) e -- Light abstraction layer for thread-safe mutable data type Shared a = SampleVar a newSharedVar :: a -> IO (SampleVar a) newSharedVar = newSampleVar getSharedVar :: SampleVar a -> IO a getSharedVar v = do x <- readSampleVar v writeSampleVar v x return x putSharedVar :: SampleVar a -> a -> IO () putSharedVar v x = writeSampleVar v x -- Option parsing helpers ircAddressFromOpts :: Opts -> Maybe IrcAddress ircAddressFromOpts Opts{irc_address=""} = Nothing ircAddressFromOpts Opts{irc_address=a} = Just $ parseIrcAddress a parseIrcAddress :: String -> IrcAddress parseIrcAddress a = either (\e -> opterror $ printf "could not parse IRC address \"%s\"\n%s\n" a (show e)) id $ parse ircaddrp "" a where ircaddrp = choice' $ [ do -- pre 0.5 syntax: [irc://]NICK@IRCSERVER[:PORT]/[#]CHANNEL optional $ choice' $ map string ["irc://", "irc:"] n <- many1 $ noneOf "@" char '@' s <- many1 $ noneOf ":/" p <- optionMaybe $ char ':' >> read `fmap` many1 digit >>= return char '/' optional $ char '#' c <- many1 $ noneOf "/ \t" eof return $ IrcAddress s p ('#':c) n , do -- new easier syntax: [irc://]IRCSERVER[:PORT]/[#]CHANNEL/NICK optional $ choice' $ map string ["irc://", "irc:"] s <- many1 $ noneOf ":/" p <- optionMaybe $ char ':' >> read `fmap` many1 digit >>= return char '/' optional $ char '#' c <- many1 $ noneOf "/" char '/' n <- many1 $ noneOf "/ \t" eof return $ IrcAddress s p ('#':c) n ] -- | A version of error' that suggests --help. opterror :: String -> a opterror = error' . (++ " (see --help for usage)") -- | A version of error that's better at displaying unicode. error' :: String -> a error' = error . toPlatformString -- | Convert a feed item to a string for the bot to announce on irc. -- The announcement is likely but not guaranteed to fit within a -- single irc message. toAnnouncement:: Opts -> Item -> String toAnnouncement opts i = applyReplacements opts $ printf "%s%s%s%s%s" title desc author' date link' where title = unlessopt no_title $ maybe "" (truncateWordsAt maxtitlelength "..." . clean) (getItemTitle i) desc = ifopt description $ maybe "" ((" - "++) . truncateWordsAt maxdesclength "..." . clean) (getItemDescription i) author' = ifopt author $ maybe "" ((" "++) . parenthesise . truncateWordsAt maxauthorlength "..." . clean) (getItemAuthor i) date = ifopt time $ maybe "" ((" "++) . truncateAt maxdatelength "..." . clean) (getItemDate i) link' = ifopt link_ $ maybe "" ((" "++) . truncateAt maxlinklength "..." . clean) (getItemLink i) clean = oneline . trimwhitespace . striphtml . stripemail ifopt o = if o opts then id else const "" unlessopt o = if not $ o opts then id else const "" oneline = intercalate " " . map strip . lines -- two spaces to hint at newlines & brs trimwhitespace = gsubRegexPR "[ \t][ \t]+" " " striphtml = if html opts then id else stripHtml . brtonewline brtonewline = gsubRegexPR "(<|<) *br */?(>|>)" "\n" stripemail = if email opts then id else stripEmails parenthesise = (++")").("("++) -- | Split an announcement into one or more suitably truncated and -- formatted irc messages. Each call returns the next message and -- the remainder of the announcement. -- XXX n must be > length continuationsuffix splitAnnouncement :: String -> (String,String) splitAnnouncement a | length a <= maxmessagelength = (a,"") | otherwise = case splitAtWordBefore n a of (m,rest@(_:_)) -> (m++continuationsuffix, continuationprefix++rest) (m,"") -> (m, "") where n = maxmessagelength - length continuationsuffix continuationprefix, continuationsuffix :: String continuationprefix = "... " continuationsuffix = " ..." -- | Truncate a string, if possible at a word boundary, at or before -- the specified position, and indicate truncation with the specified -- suffix. The length of the returned string will be in the range -- n, n+length suffix. truncateWordsAt :: Int -> String -> String -> String truncateWordsAt n suffix s | s' == s = s | otherwise = s' ++ suffix where s' = fst $ splitAtWordBefore n s -- | Truncate a string at the specified position, and indicate -- truncation with the specified suffix. The length of the returned -- string will be in the range n, n+length suffix. truncateAt :: Int -> String -> String -> String truncateAt n suffix s | s' == s = s | otherwise = s' ++ suffix where s' = take n s -- | Split a string at or before the specified position, on a word boundary if possible. splitAtWordBefore :: Int -> String -> (String,String) splitAtWordBefore n s | null a || (null b) = (rstrip a, lstrip b) | last a == ' ' || (head b == ' ') || (not $ ' ' `elem` a) = (rstrip a, lstrip b) | otherwise = (rstrip $ take (length a - length partialword) a, partialword ++ lstrip b) where (a,b) = splitAt n s partialword = reverse $ takeWhile (/= ' ') $ reverse a -- | Apply all --replace substitutions to a string, in turn. -- Warning, will fail at runtime if there is a bad regexp. applyReplacements :: Opts -> String -> String applyReplacements opts = foldl' (.) id (reverse substitutions) where substitutions = map replaceOptToSubst $ replace opts replaceOptToSubst s = case splitRegexPR "(? gsubRegexPR pat sub _ -> id -- | Replace any HTML tags or entities in a string with a single space. stripHtml :: String -> String stripHtml = gsubRegexPR "(&[^ \t]*?;|<.*?>)" " " -- | Remove any email addresses from a string. stripEmails :: String -> String stripEmails = gsubRegexPR "(?i) ?(<|<)?\\b[-._%+a-z0-9]+@[-.a-z0-9]+\\.[a-z]{2,4}\\b(>|>)?" "" maybeRead :: Read a => String -> Maybe a maybeRead s = case reads s of [(x, _)] -> Just x _ -> Nothing decrementMaybe :: Enum a => Maybe a -> Maybe a decrementMaybe = maybe Nothing (Just . pred) -- | Parse a datetime string if possible, trying at least the formats -- likely to be used in RSS/Atom feeds. parseDateTime :: String -> Maybe UTCTime parseDateTime s = firstJust [parseTime defaultTimeLocale f s' | f <- formats] where s' = adaptForParseTime s adaptForParseTime = gsubRegexPR "(....-..-..T..:..:..[\\+\\-]..):(..)" "\\1\\2" -- 2009-09-22T13:10:56+00:00 formats = -- http://hackage.haskell.org/packages/archive/time/1.1.4/doc/html/Data-Time-Format.html#v%3AformatTime [ "%a, %d %b %Y %T %z" -- Fri, 18 Sep 2009 12:42:07 -0400 ,"%a, %d %b %Y %T %Z" -- Fri, 25 Sep 2009 11:01:23 UTC ,"%Y-%m-%dT%T%z" -- 2009-09-22T13:10:56+0000 ] firstJust :: [Maybe a] -> Maybe a firstJust ms = case dropWhile isNothing ms of (m:_) -> m _ -> Nothing -- | Grammatically correct "every N minutes". everyMinutesString :: Int -> String everyMinutesString 1 = "every minute" everyMinutesString i = "every " ++ show i ++ " minutes" -- | Grammatically correct "in N minutes". inMinutesString :: Int -> String inMinutesString 1 = "in 1 minute" inMinutesString i = "in " ++ show i ++ " minutes" -- | Log some text to the console with a timestamp. log :: String -> IO () log s = do t <- getTimeStamp putStrLn $ printf "%s: %s" t s hFlush stdout -- | Decorate some multi-line text with a label and start/end separators. labelledText :: String -> String -> String labelledText label s = printf "========== %s:\n%s\n=============================================\n" label s getCurrentLocalTime :: IO LocalTime getCurrentLocalTime = do t <- getCurrentTime tz <- getCurrentTimeZone return $ utcToLocalTime tz t getTimeStamp :: IO String getTimeStamp = do t <- getCurrentLocalTime tz <- getCurrentTimeZone return $ printf "%s %s" (take 19 $ show t) (show tz) hours, minutes, seconds :: Int hours = 60 * minutes minutes = 60 * seconds seconds = 10^(6::Int) strip, lstrip, rstrip, dropws :: String -> String strip = lstrip . rstrip lstrip = dropws rstrip = reverse . dropws . reverse dropws = dropWhile (`elem` " \t") chomp :: String -> String chomp = reverse . dropWhile (`elem` "\n\r") . reverse isLeft :: Either a b -> Bool isLeft (Left _) = True isLeft _ = False -- platform strings -- | A platform string is a string value from or for the operating system, -- such as a file path or command-line argument (or environment variable's -- name or value ?). On some platforms (such as unix) these are not real -- unicode strings but have some encoding such as UTF-8. This alias does -- no type enforcement but aids code clarity. type PlatformString = String -- | Convert a possibly encoded platform string to a real unicode string. -- We decode the UTF-8 encoding recommended for unix systems -- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html) -- and leave anything else unchanged. fromPlatformString :: PlatformString -> String fromPlatformString s = if UTF8.isUTF8Encoded s then UTF8.decodeString s else s -- | Convert a unicode string to a possibly encoded platform string. -- On unix we encode with the recommended UTF-8 -- (cf http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html) -- and elsewhere we leave it unchanged. toPlatformString :: String -> PlatformString toPlatformString = case os of "unix" -> UTF8.encodeString "linux" -> UTF8.encodeString "darwin" -> UTF8.encodeString _ -> id -- | Backtracking choice, use this when alternatives share a prefix. -- Consumes no input if all choices fail. choice' :: [GenParser tok st a] -> GenParser tok st a choice' = choice . map Text.ParserCombinators.Parsec.try