{- |This module provides support for Google Safe Browsing API (<http://code.google.com/apis/safebrowsing/>).
   To use this module you need to obtain access key from project's page.

  Typical use of this module will look like:

>  Right mhsh <- updateHash your_key makeEmptyMalwareHash
>  checkURL mhsh "http://kaishi2009.com/"
>  checkURL mhsh ...

It's better to save hashes between runs to avoid of full updates every time.  So at the end of program it's better to store hash on disk with following code:

>  saveHash "malware.dat" mhsh

And during startup, load it from file, if it exists on disk:

>  Right mhsh <- load Hash "malware.dat"

-}
module GoogleSB
    (Hash,
     updateHash,
     loadHash,
     saveHash,
     checkURL,
     makeEmptyBlackHash,
     makeEmptyMalwareHash
    )
    where

import IO
import Network.URI
import Network.Browser
import Network.HTTP
import Data.List.Split (splitOn)
import Data.List (intercalate, nub, foldl', elem, delete, intersect, sort)
import Data.Digest.MD5 (hash)
import Data.Char (ord, chr, toLower)
import Codec.Utils
import Numeric (showHex, readHex)
import Data.Binary
import Control.Monad
import qualified Control.Exception as Ex
import Control.Monad.Error


{- |Data structure to store data about Hash. It contains information about version, name
    of hash, and actual Hash data -}
data Hash = Hash {
      versionMajor :: Int -- ^Major part of version number
    , versionMinor :: Int -- ^Minor part of version number
    , name :: String      -- ^Hash name (see documentation about Google SB)
    , hashes :: [Integer] -- ^Actual data
    } deriving (Show, Eq)

instance Binary Hash where
    put (Hash vmj vmn name hsh) = put vmj >> put vmn >> put name >> put hsh
    get = liftM4 Hash get get get get

-- |Creates empty 'black' Hash object
makeEmptyBlackHash :: Hash
makeEmptyBlackHash = Hash { versionMajor = 1, versionMinor= -1, hashes= [], name = "goog-black-hash"}

-- |Creates empty 'malware' Hash object
makeEmptyMalwareHash :: Hash
makeEmptyMalwareHash = Hash { versionMajor = 1, versionMinor= -1, hashes= [], name = "goog-malware-hash"}

{- |Performs update of Hash from server.  For first update, you need to provide hash
 created by 'makeEmptyBlackHash' or 'makeEmptyMalwareHash'.  And for later updates, you
 will use existing hashes to get only updates to hashes, not the full database. -}
updateHash :: String -- ^Access key for Google SB. See module description
           -> Hash -- ^Hash to update.  Use 'makeEmptyBlackHash' or 'makeEmptyMalwareHash' for first update
           -> IO (Either String Hash) {- ^Result of update: 'Left String' if error happens
                                          (String contains error message), or 'Right Hash' on success -}
updateHash key hash = Ex.handle (makeHandler "Hash update error: ") $ do
                           hdata <- downloadHash key hash
                           return $ case hdata of
                                      Left err  -> Left err;
                                      Right str -> parseHash hash str

{- |Loads Hash from given file -}
loadHash :: FilePath -- ^File from which Hash will loaded
         -> IO (Either String Hash) {- ^Result of loading: 'Left String' if error happens
                                       (String contains error message), or 'Right Hash' on success -}
loadHash fname = Ex.handle (makeHandler "Hash load error: ") $ do
                   hash <- decodeFile fname
                   return (Right hash)

-- |Saves Hash into given file
saveHash :: FilePath -- ^File in which Hash will stored
         -> Hash -- ^Hash to store
         -> IO ()
saveHash = encodeFile

{- |Performs checking of presence of given URL (second argument of function) in the Hash (first argument). -}
checkURL :: Hash -- ^Hash to check against
         -> String -- ^URL to check
         -> Bool -- ^True, if URL exists in database, False - otherwise
checkURL hsh url = not $ null sl
    where hl = map (\ x -> octets2int $ hash $ str2octets x) $ generateURLVariants url
          set = hashes hsh
          sl = intersect set hl

-- Auxiliary functions

--
makeHandler :: (Monad m) => String -> Ex.SomeException -> m (Either String Hash)
makeHandler str ex = return (Left $ str ++ show (ex::Ex.SomeException))

--
makeSBURL :: String -> Hash -> String
makeSBURL key hash = "http://sb.google.com/safebrowsing/update?client=api&apikey=" ++ key
                   ++ "&version=" ++ name hash
                   ++ ":" ++ (show $ versionMajor hash) ++ ":" ++ (show $ versionMinor hash)

--
downloadHash :: String -> Hash -> IO (Either String String)
downloadHash key hash = do
  rspO <- browse $ do
            setAllowRedirects True
            request $ getRequest $ makeSBURL key hash
  let rsp = snd rspO
      code = rspCode rsp
  return $ case code of
             (2,_,_) -> Right $ rspBody rsp;
             _       -> Left "Error fetching data"

--
getHashVersion :: String -> (Int,Int)
getHashVersion ('[':xs) | last xs == ']' =
                            let vs = splitOn "." $ head $ tail $ splitOn " " $ init xs
                            in if null vs
                               then (-1,-1)
                               else (read (head vs)::Int, read (last vs)::Int)
                        | otherwise = (-1,-1)
getHashVersion _ = (-1, -1)

parseHash :: Hash -> String -> (Either String Hash)
parseHash h [] = Right h
parseHash h d =
    let l = lines d
        (vmj,vmn) = getHashVersion $ head l
        stripTab s = if last s == '\t' then init s else s
        convString = fst . head . readHex . stripTab
        loop h' [""] = h'
        loop h' [] = h'
        loop h' (x:xs) | head x == '-' = loop (delete (convString $ tail x)  h') xs
                       | head x == '+' = loop ((convString $ tail x) `seq` (convString $ tail x) : h') xs
                       | otherwise = loop ((convString x) `seq` (convString x) : h') xs
    in if null l || vmj == -1 || vmn == -1
       then Left "Hash parse error"
       else Right h { hashes = sort $ loop (hashes h) $ tail l,
                      versionMajor = vmj,
                      versionMinor = vmn}


str2octets :: String -> [Octet]
str2octets = concatMap (\ x -> toTwosComp $ ord x)

--
octets2int :: [Octet] -> Integer
octets2int = foldl' (\x y -> x*256 + fromIntegral y) 0

--
lowercase :: String -> String
lowercase = map (\x -> toLower x)

--
generateURLVariants :: String -> [String]
generateURLVariants uri =
    let u = parseURI uri
    in case u of
         Nothing -> [""]
         Just r -> nub [ h ++ p | h <- generateHosts (uriAuthority r),
                                  p <- generatePaths (uriPath r) (uriQuery r)]

--
generateHosts :: (Maybe URIAuth) -> [String]
generateHosts Nothing = []
generateHosts (Just auth) = h:nl
    where h = lowercase $ uriRegName auth
          hl=reverse $ splitOn "." h
          nl=map (\ x -> intercalate "." $ reverse $ take x hl) [2..(min 5 $ length hl -1)]

-- length $ generateURLVariants "http://a.b.c.d.e.f.g/1.html" == 10
-- length $ generateURLVariants "http://a.b.c/1/2.html?param=1" == 8

--
generatePaths :: String -> String -> [String]
generatePaths path [] = path:nl
    where np = normalizePathSegments path
          pl = drop 1 $ splitOn "/" np
          plLenght = length pl
          lastChar x = if x == 0 || x == plLenght then "" else "/"
          nl = map (\ x -> '/':(intercalate "/" $ take x pl) ++ lastChar x) [0..(min 3 plLenght)]
generatePaths path query = (path ++ query) : generatePaths path []