-- Author:     Andy Stewart <lazycat.manatee@gmail.com>
-- Maintainer: Andy Stewart <lazycat.manatee@gmail.com>
-- 
-- Copyright (C) 2010 Andy Stewart, all rights reserved.
-- 
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- any later version.
-- 
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
-- 
-- You should have received a copy of the GNU General Public License
-- along with this program.  If not, see <http://www.gnu.org/licenses/>.

module Google.Suggest (
    Language (..),
    suggest
) where

import Data.Maybe (fromMaybe)
import Network.Curl.Download (openAsXML)
import Text.XML.Light (Content (Elem), filterElementsName, findAttrBy, qName)

import qualified Codec.Binary.Url as Url
import qualified Codec.Binary.UTF8.String as UTF8

data Language = 
    Afrikaans
  | Albanian
  | Amharic
  | Arabic
  | Armenian
  | Azerbaijani
  | Basque
  | Belarusian
  | Bengali
  | Bihari
  | Bulgarian
  | Burmese
  | Catalan
  | Cherokee
  | Chinese
  | ChineseSimplified
  | ChineseTraditional
  | Croatian
  | Czech
  | Danish
  | Dhivehi
  | Dutch
  | English
  | Esperanto
  | Estonian
  | Filipino
  | Finnish
  | French
  | Galician
  | Georgian
  | German
  | Greek
  | Guarani
  | Gujarati
  | Hebrew
  | Hindi
  | Hungarian
  | Icelandic
  | Indonesian
  | Inuktitut
  | Italian
  | Japanese
  | Kannada
  | Kazakh
  | Khmer
  | Korean
  | Kurdish
  | Kyrgyz
  | Laothian
  | Latvian
  | Lithuanian
  | Macedonian
  | Malay
  | Malayalam
  | Maltese
  | Marathi
  | Mongolian
  | Nepali
  | Norwegian
  | Oriya
  | Pashto
  | Persian
  | Polish
  | Portuguese
  | Punjabi
  | Romanian
  | Russian
  | Sanskrit
  | Serbian
  | Sindhi
  | Sinhalese
  | Slovak
  | Slovenian
  | Spanish
  | Swahili
  | Swedish
  | Tajik
  | Tamil
  | Tagalog
  | Telugu
  | Thai
  | Tibetan
  | Turkish
  | Ukrainian
  | Urdu
  | Uzbek
  | Uighur
  | Vietnamese
  | Unknown
    deriving (Show, Eq, Ord, Read)

-- | Get [(suggestions, queries)] from Google Suggest.
suggest :: String                -- ^ search term
        -> Maybe Language        -- ^ @'Just' language@, or 'Nothing' to use 'English' default
        -> IO (Either String [(String, Int)])
suggest keyword language = do
  -- Build URL.
  let url = "http://google.com/complete/search?output=toolbar" 
            -- Language.
            ++ ("&hl=" ++ getLanguageCode (fromMaybe English language))
            -- Request text. 
            ++ ("&q=" ++ Url.encode (UTF8.encode keyword)) 

  -- Request XML data.
  string <- openAsXML url

  return $
      case string of
        Right (_:Elem element:_) -> do
          let qNameEqual str name = qName name == str
              elements    = filterElementsName (qNameEqual "CompleteSuggestion") element
              suggestions = concatMap (filterElementsName (qNameEqual "suggestion")) elements
              queries     = concatMap (filterElementsName (qNameEqual "num_queries")) elements
          if length suggestions == length queries
             then do
               let names = map (UTF8.decodeString . fromMaybe "" . findAttrBy (qNameEqual "data")) suggestions 
                   nums  = map (\x -> case findAttrBy (qNameEqual "int") x of
                                       Just str -> read str :: Int
                                       Nothing -> 0) queries
               Right $ zip names nums
             else Left "Parse failed."
        _ ->  Left "Parse failed."

-- | Get language code.
getLanguageCode :: Language -> String
getLanguageCode lang = 
  case lang of
    Afrikaans          -> "af"
    Albanian           -> "sq"
    Amharic            -> "am"
    Arabic             -> "ar"
    Armenian           -> "hy"
    Azerbaijani        -> "az"
    Basque             -> "eu"
    Belarusian         -> "be"
    Bengali            -> "bn"
    Bihari             -> "bh"
    Bulgarian          -> "bg"
    Burmese            -> "my"
    Catalan            -> "ca"
    Cherokee           -> "chr"
    Chinese            -> "zh"
    ChineseSimplified  -> "zh-CN"
    ChineseTraditional -> "zh-TW"
    Croatian           -> "hr"
    Czech              -> "cs"
    Danish             -> "da"
    Dhivehi            -> "dv"
    Dutch              -> "nl"
    English            -> "en"
    Esperanto          -> "eo"
    Estonian           -> "et"
    Filipino           -> "tl"
    Finnish            -> "fi"
    French             -> "fr"
    Galician           -> "gl"
    Georgian           -> "ka"
    German             -> "de"
    Greek              -> "el"
    Guarani            -> "gn"
    Gujarati           -> "gu"
    Hebrew             -> "iw"
    Hindi              -> "hi"
    Hungarian          -> "hu"
    Icelandic          -> "is"
    Indonesian         -> "id"
    Inuktitut          -> "iu"
    Italian            -> "it"
    Japanese           -> "ja"
    Kannada            -> "kn"
    Kazakh             -> "kk"
    Khmer              -> "km"
    Korean             -> "ko"
    Kurdish            -> "ku"
    Kyrgyz             -> "ky"
    Laothian           -> "lo"
    Latvian            -> "lv"
    Lithuanian         -> "lt"
    Macedonian         -> "mk"
    Malay              -> "ms"
    Malayalam          -> "ml"
    Maltese            -> "mt"
    Marathi            -> "mr"
    Mongolian          -> "mn"
    Nepali             -> "ne"
    Norwegian          -> "no"
    Oriya              -> "or"
    Pashto             -> "ps"
    Persian            -> "fa"
    Polish             -> "pl"
    Portuguese         -> "pt-PT"
    Punjabi            -> "pa"
    Romanian           -> "ro"
    Russian            -> "ru"
    Sanskrit           -> "sa"
    Serbian            -> "sr"
    Sindhi             -> "sd"
    Sinhalese          -> "si"
    Slovak             -> "sk"
    Slovenian          -> "sl"
    Spanish            -> "es"
    Swahili            -> "sw"
    Swedish            -> "sv"
    Tajik              -> "tg"
    Tamil              -> "ta"
    Tagalog            -> "tl"
    Telugu             -> "te"
    Thai               -> "th"
    Tibetan            -> "bo"
    Turkish            -> "tr"
    Ukrainian          -> "uk"
    Urdu               -> "ur"
    Uzbek              -> "uz"
    Uighur             -> "ug"
    Vietnamese         -> "vi"
    Unknown            -> ""