-- Author: Andy Stewart -- Maintainer: Andy Stewart -- -- Copyright (C) 2010 Andy Stewart, all rights reserved. -- -- This program is free software: you can redistribute it and/or modify -- it under the terms of the GNU General Public License as published by -- the Free Software Foundation, either version 3 of the License, or -- any later version. -- -- This program is distributed in the hope that it will be useful, -- but WITHOUT ANY WARRANTY; without even the implied warranty of -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -- GNU General Public License for more details. -- -- You should have received a copy of the GNU General Public License -- along with this program. If not, see . module Google.Suggest ( Language (..), suggest ) where import Data.Maybe (fromMaybe) import Network.Curl.Download (openAsXML) import Text.XML.Light (Content (Elem), filterElementsName, findAttrBy, qName) import qualified Codec.Binary.Url as Url import qualified Codec.Binary.UTF8.String as UTF8 data Language = Afrikaans | Albanian | Amharic | Arabic | Armenian | Azerbaijani | Basque | Belarusian | Bengali | Bihari | Bulgarian | Burmese | Catalan | Cherokee | Chinese | ChineseSimplified | ChineseTraditional | Croatian | Czech | Danish | Dhivehi | Dutch | English | Esperanto | Estonian | Filipino | Finnish | French | Galician | Georgian | German | Greek | Guarani | Gujarati | Hebrew | Hindi | Hungarian | Icelandic | Indonesian | Inuktitut | Italian | Japanese | Kannada | Kazakh | Khmer | Korean | Kurdish | Kyrgyz | Laothian | Latvian | Lithuanian | Macedonian | Malay | Malayalam | Maltese | Marathi | Mongolian | Nepali | Norwegian | Oriya | Pashto | Persian | Polish | Portuguese | Punjabi | Romanian | Russian | Sanskrit | Serbian | Sindhi | Sinhalese | Slovak | Slovenian | Spanish | Swahili | Swedish | Tajik | Tamil | Tagalog | Telugu | Thai | Tibetan | Turkish | Ukrainian | Urdu | Uzbek | Uighur | Vietnamese | Unknown deriving (Show, Eq, Ord, Read) -- | Get [(suggestions, queries)] from Google Suggest. suggest :: String -- ^ search term -> Maybe Language -- ^ @'Just' language@, or 'Nothing' to use 'English' default -> IO (Either String [(String, Int)]) suggest keyword language = do -- Build URL. let url = "http://google.com/complete/search?output=toolbar" -- Language. ++ ("&hl=" ++ getLanguageCode (fromMaybe English language)) -- Request text. ++ ("&q=" ++ Url.encode (UTF8.encode keyword)) -- Request XML data. string <- openAsXML url return $ case string of Right (_:Elem element:_) -> do let qNameEqual str name = qName name == str elements = filterElementsName (qNameEqual "CompleteSuggestion") element suggestions = concatMap (filterElementsName (qNameEqual "suggestion")) elements queries = concatMap (filterElementsName (qNameEqual "num_queries")) elements if length suggestions == length queries then do let names = map (UTF8.decodeString . fromMaybe "" . findAttrBy (qNameEqual "data")) suggestions nums = map (\x -> case findAttrBy (qNameEqual "int") x of Just str -> read str :: Int Nothing -> 0) queries Right $ zip names nums else Left "Parse failed." _ -> Left "Parse failed." -- | Get language code. getLanguageCode :: Language -> String getLanguageCode lang = case lang of Afrikaans -> "af" Albanian -> "sq" Amharic -> "am" Arabic -> "ar" Armenian -> "hy" Azerbaijani -> "az" Basque -> "eu" Belarusian -> "be" Bengali -> "bn" Bihari -> "bh" Bulgarian -> "bg" Burmese -> "my" Catalan -> "ca" Cherokee -> "chr" Chinese -> "zh" ChineseSimplified -> "zh-CN" ChineseTraditional -> "zh-TW" Croatian -> "hr" Czech -> "cs" Danish -> "da" Dhivehi -> "dv" Dutch -> "nl" English -> "en" Esperanto -> "eo" Estonian -> "et" Filipino -> "tl" Finnish -> "fi" French -> "fr" Galician -> "gl" Georgian -> "ka" German -> "de" Greek -> "el" Guarani -> "gn" Gujarati -> "gu" Hebrew -> "iw" Hindi -> "hi" Hungarian -> "hu" Icelandic -> "is" Indonesian -> "id" Inuktitut -> "iu" Italian -> "it" Japanese -> "ja" Kannada -> "kn" Kazakh -> "kk" Khmer -> "km" Korean -> "ko" Kurdish -> "ku" Kyrgyz -> "ky" Laothian -> "lo" Latvian -> "lv" Lithuanian -> "lt" Macedonian -> "mk" Malay -> "ms" Malayalam -> "ml" Maltese -> "mt" Marathi -> "mr" Mongolian -> "mn" Nepali -> "ne" Norwegian -> "no" Oriya -> "or" Pashto -> "ps" Persian -> "fa" Polish -> "pl" Portuguese -> "pt-PT" Punjabi -> "pa" Romanian -> "ro" Russian -> "ru" Sanskrit -> "sa" Serbian -> "sr" Sindhi -> "sd" Sinhalese -> "si" Slovak -> "sk" Slovenian -> "sl" Spanish -> "es" Swahili -> "sw" Swedish -> "sv" Tajik -> "tg" Tamil -> "ta" Tagalog -> "tl" Telugu -> "te" Thai -> "th" Tibetan -> "bo" Turkish -> "tr" Ukrainian -> "uk" Urdu -> "ur" Uzbek -> "uz" Uighur -> "ug" Vietnamese -> "vi" Unknown -> ""