module Network.URL.Archiver (checkArchive) where
import Control.Monad (when)
import Data.Char (isAlphaNum, isAscii)
import Data.List (isPrefixOf)
import Data.Maybe (fromJust)
import Network.Browser (browse, formToRequest, request, Form(..))
import Network.HTTP (getRequest, rspBody, simpleHTTP, RequestMethod(POST))
import Network.URI (isURI, parseURI, uriPath)
import System.Random (getStdGen, randomR)
import Text.Printf (printf)
openURL = simpleHTTP . getRequest
checkArchive :: String
-> String
-> IO ()
checkArchive email url = when (isURI url) (alexaToolbar url >> webciteArchive email url >> alexaArchive url >> internetArchiveLive url >> wikiwixArchive url)
webciteArchive :: String -> String -> IO ()
webciteArchive email url = when (not $ "http://www.webcitation.org" `isPrefixOf` url) $
void $ openURL ("http://www.webcitation.org/archive?url=" ++ url ++ "&email=" ++ email)
where void = (>> return ())
internetArchiveLive :: String -> IO ()
internetArchiveLive url = openURL ("http://liveweb.archive.org/"++url) >> return ()
alexaArchive :: String -> IO ()
alexaArchive url = when (not $ "http://www.archive.org" `isPrefixOf` url) $
do let archiveform = Form POST
(fromJust $ parseURI "http://www.alexa.com/help/crawlrequest")
[("url", url), ("submit", "")]
(uri, resp) <- browse $ request $ formToRequest archiveform
when (uriPath uri /= "/help/crawlthanks") $
print $ "Request failed! Alexa changed webpages? Response:" ++ rspBody resp
alexaToolbar :: String -> IO ()
alexaToolbar url = do gen <- getStdGen
let rint = fst $ randomR (1000::Int,20000) gen
let payload = "wid=" ++ show rint ++ "&ref=&url=" ++ escape url
_ <- openURL $ "http://data.alexa.com/data/SbADd155Tq0000?cli=10&ver=spkyf-1.5.0&dat=ns&cdt=rq=0&" ++ payload
return ()
where escape :: String -> String
escape = concatMap escapeURIChar
escapeURIChar :: Char -> String
escapeURIChar c | isAscii c && isAlphaNum c = [c]
| otherwise = concatMap (printf "%%%02X") [c]
wikiwixArchive :: String -> IO ()
wikiwixArchive url = openURL ("http://archive.wikiwix.com/cache/?url="++url) >> return ()