--
-- | Fetch URL page titles of HTML links.
--
module Plugin.Url (theModule) where
import Plugin
PLUGIN Url
instance Module UrlModule Bool where
moduleHelp _ "url-title" = "url-title . Fetch the page title."
moduleCmds _ = ["url-title"]
modulePrivs _ = ["url-on", "url-off"]
moduleDefState _ = return True -- url on
process_ _ "url-title" url = fetchTitle url
process_ _ "url-on" _ = writeMS True >> return ["Url enabled"]
process_ _ "url-off" _ = writeMS False >> return ["Url disabled"]
contextual _ _ _ text = do
alive <- readMS
if alive && (not $ areSubstringsOf ignoredStrings text)
then case containsUrl text of
Nothing -> return []
Just url -> fetchTitle url
else return []
------------------------------------------------------------------------
-- | Fetch the title of the specified URL.
fetchTitle :: String -> LB [String]
fetchTitle url = do
title <- io $ urlPageTitle url (proxy config)
return $ maybe [] return title
-- | List of strings that, if present in a contextual message, will
-- prevent the looking up of titles. This list can be used to stop
-- responses to lisppaste for example. Another important use is to
-- another lambdabot looking up a url title that contains another
-- url in it (infinite loop). Ideally, this list could be added to
-- by an admin via a privileged command (TODO).
ignoredStrings :: [String]
ignoredStrings =
["paste", -- Ignore lisppaste, rafb.net
"cpp.sourcforge.net", -- C++ paste bin
"HaskellIrcPastePage", -- Ignore paste page
"title of that page", -- Ignore others like the old me
urlTitlePrompt] -- Ignore others like me
-- | Suffixes that should be stripped off when identifying URLs in
-- contextual messages. These strings may be punctuation in the
-- current sentence vs part of a URL. Included here is the NUL
-- character as well.
ignoredUrlSuffixes :: [String]
ignoredUrlSuffixes = [".", ",", ";", ")", "\"", "\1"]
-- | Searches a string for an embeddded URL and returns it.
containsUrl :: String -> Maybe String
containsUrl text = do
(_,kind,rest,_) <- matchRegexAll begreg text
let url = takeWhile (/=' ') rest
return $ stripSuffixes ignoredUrlSuffixes $ kind ++ url
where
begreg = mkRegexWithOpts "https?://" True False
-- | Utility function to remove potential suffixes from a string.
-- Note, once a suffix is found, it is stripped and returned, no other
-- suffixes are searched for at that point.
stripSuffixes :: [String] -> String -> String
stripSuffixes [] str = str
stripSuffixes (s:ss) str
| isSuffixOf s str = take (length str - length s) $ str
| otherwise = stripSuffixes ss str
-- | Utility function to check of any of the Strings in the specified
-- list are substrings of the String.
areSubstringsOf :: [String] -> String -> Bool
areSubstringsOf = flip (any . flip isSubstringOf)
where
isSubstringOf s str = any (isPrefixOf s) (tails str)