{- |This module provides support for Google Safe Browsing API (). To use this module you need to obtain access key from project's page. Typical use of this module will look like: > Right mhsh <- updateHash your_key makeEmptyMalwareHash > checkURL mhsh "http://kaishi2009.com/" > checkURL mhsh ... It's better to save hashes between runs to avoid of full updates every time. So at the end of program it's better to store hash on disk with following code: > saveHash "malware.dat" mhsh And during startup, load it from file, if it exists on disk: > Right mhsh <- load Hash "malware.dat" -} module GoogleSB (Hash, updateHash, loadHash, saveHash, checkURL, makeEmptyBlackHash, makeEmptyMalwareHash ) where import IO import Network.URI import Network.Browser import Network.HTTP import Data.List.Split (splitOn) import Data.List (intercalate, nub, foldl', elem, delete, intersect, sort) import Data.Digest.MD5 (hash) import Data.Char (ord, chr, toLower) import Codec.Utils import Numeric (showHex, readHex) import Data.Binary import Control.Monad import qualified Control.Exception as Ex import Control.Monad.Error {- |Data structure to store data about Hash. It contains information about version, name of hash, and actual Hash data -} data Hash = Hash { versionMajor :: Int -- ^Major part of version number , versionMinor :: Int -- ^Minor part of version number , name :: String -- ^Hash name (see documentation about Google SB) , hashes :: [Integer] -- ^Actual data } deriving (Show, Eq) instance Binary Hash where put (Hash vmj vmn name hsh) = put vmj >> put vmn >> put name >> put hsh get = liftM4 Hash get get get get -- |Creates empty 'black' Hash object makeEmptyBlackHash :: Hash makeEmptyBlackHash = Hash { versionMajor = 1, versionMinor= -1, hashes= [], name = "goog-black-hash"} -- |Creates empty 'malware' Hash object makeEmptyMalwareHash :: Hash makeEmptyMalwareHash = Hash { versionMajor = 1, versionMinor= -1, hashes= [], name = "goog-malware-hash"} {- |Performs update of Hash from server. For first update, you need to provide hash created by 'makeEmptyBlackHash' or 'makeEmptyMalwareHash'. And for later updates, you will use existing hashes to get only updates to hashes, not the full database. -} updateHash :: String -- ^Access key for Google SB. See module description -> Hash -- ^Hash to update. Use 'makeEmptyBlackHash' or 'makeEmptyMalwareHash' for first update -> IO (Either String Hash) {- ^Result of update: 'Left String' if error happens (String contains error message), or 'Right Hash' on success -} updateHash key hash = Ex.handle (makeHandler "Hash update error: ") $ do hdata <- downloadHash key hash return $ case hdata of Left err -> Left err; Right str -> parseHash hash str {- |Loads Hash from given file -} loadHash :: FilePath -- ^File from which Hash will loaded -> IO (Either String Hash) {- ^Result of loading: 'Left String' if error happens (String contains error message), or 'Right Hash' on success -} loadHash fname = Ex.handle (makeHandler "Hash load error: ") $ do hash <- decodeFile fname return (Right hash) -- |Saves Hash into given file saveHash :: FilePath -- ^File in which Hash will stored -> Hash -- ^Hash to store -> IO () saveHash = encodeFile {- |Performs checking of presence of given URL (second argument of function) in the Hash (first argument). -} checkURL :: Hash -- ^Hash to check against -> String -- ^URL to check -> Bool -- ^True, if URL exists in database, False - otherwise checkURL hsh url = not $ null sl where hl = map (\ x -> octets2int $ hash $ str2octets x) $ generateURLVariants url set = hashes hsh sl = intersect set hl -- Auxiliary functions -- makeHandler :: (Monad m) => String -> Ex.SomeException -> m (Either String Hash) makeHandler str ex = return (Left $ str ++ show (ex::Ex.SomeException)) -- makeSBURL :: String -> Hash -> String makeSBURL key hash = "http://sb.google.com/safebrowsing/update?client=api&apikey=" ++ key ++ "&version=" ++ name hash ++ ":" ++ (show $ versionMajor hash) ++ ":" ++ (show $ versionMinor hash) -- downloadHash :: String -> Hash -> IO (Either String String) downloadHash key hash = do rspO <- browse $ do setAllowRedirects True request $ getRequest $ makeSBURL key hash let rsp = snd rspO code = rspCode rsp return $ case code of (2,_,_) -> Right $ rspBody rsp; _ -> Left "Error fetching data" -- getHashVersion :: String -> (Int,Int) getHashVersion ('[':xs) | last xs == ']' = let vs = splitOn "." $ head $ tail $ splitOn " " $ init xs in if null vs then (-1,-1) else (read (head vs)::Int, read (last vs)::Int) | otherwise = (-1,-1) getHashVersion _ = (-1, -1) parseHash :: Hash -> String -> (Either String Hash) parseHash h [] = Right h parseHash h d = let l = lines d (vmj,vmn) = getHashVersion $ head l stripTab s = if last s == '\t' then init s else s convString = fst . head . readHex . stripTab loop h' [""] = h' loop h' [] = h' loop h' (x:xs) | head x == '-' = loop (delete (convString $ tail x) h') xs | head x == '+' = loop ((convString $ tail x) `seq` (convString $ tail x) : h') xs | otherwise = loop ((convString x) `seq` (convString x) : h') xs in if null l || vmj == -1 || vmn == -1 then Left "Hash parse error" else Right h { hashes = sort $ loop (hashes h) $ tail l, versionMajor = vmj, versionMinor = vmn} str2octets :: String -> [Octet] str2octets = concatMap (\ x -> toTwosComp $ ord x) -- octets2int :: [Octet] -> Integer octets2int = foldl' (\x y -> x*256 + fromIntegral y) 0 -- lowercase :: String -> String lowercase = map (\x -> toLower x) -- generateURLVariants :: String -> [String] generateURLVariants uri = let u = parseURI uri in case u of Nothing -> [""] Just r -> nub [ h ++ p | h <- generateHosts (uriAuthority r), p <- generatePaths (uriPath r) (uriQuery r)] -- generateHosts :: (Maybe URIAuth) -> [String] generateHosts Nothing = [] generateHosts (Just auth) = h:nl where h = lowercase $ uriRegName auth hl=reverse $ splitOn "." h nl=map (\ x -> intercalate "." $ reverse $ take x hl) [2..(min 5 $ length hl -1)] -- length $ generateURLVariants "http://a.b.c.d.e.f.g/1.html" == 10 -- length $ generateURLVariants "http://a.b.c/1/2.html?param=1" == 8 -- generatePaths :: String -> String -> [String] generatePaths path [] = path:nl where np = normalizePathSegments path pl = drop 1 $ splitOn "/" np plLenght = length pl lastChar x = if x == 0 || x == plLenght then "" else "/" nl = map (\ x -> '/':(intercalate "/" $ take x pl) ++ lastChar x) [0..(min 3 plLenght)] generatePaths path query = (path ++ query) : generatePaths path []