-- Author:     Andy Stewart <lazycat.manatee@gmail.com>
-- Maintainer: Andy Stewart <lazycat.manatee@gmail.com>
-- 
-- Copyright (C) 2010 Andy Stewart, all rights reserved.
-- 
-- This program is free software: you can redistribute it and/or modify
-- it under the terms of the GNU General Public License as published by
-- the Free Software Foundation, either version 3 of the License, or
-- any later version.
-- 
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
-- 
-- You should have received a copy of the GNU General Public License
-- along with this program.  If not, see <http://www.gnu.org/licenses/>.

{-# LANGUAGE OverloadedStrings #-}
module Language.Translate.Google (
    Language (..),
    translate
) where

import Network.Curl.Download
import Text.JSON.AttoJSON

import qualified Codec.Binary.Url as C
import qualified Data.ByteString as B

data Language = 
    Afrikaans
  | Albanian
  | Amharic
  | Arabic
  | Armenian
  | Azerbaijani
  | Basque
  | Belarusian
  | Bengali
  | Bihari
  | Bulgarian
  | Burmese
  | Catalan
  | Cherokee
  | Chinese
  | ChineseSimplified
  | ChineseTraditional
  | Croatian
  | Czech
  | Danish
  | Dhivehi
  | Dutch
  | English
  | Esperanto
  | Estonian
  | Filipino
  | Finnish
  | French
  | Galician
  | Georgian
  | German
  | Greek
  | Guarani
  | Gujarati
  | Hebrew
  | Hindi
  | Hungarian
  | Icelandic
  | Indonesian
  | Inuktitut
  | Italian
  | Japanese
  | Kannada
  | Kazakh
  | Khmer
  | Korean
  | Kurdish
  | Kyrgyz
  | Laothian
  | Latvian
  | Lithuanian
  | Macedonian
  | Malay
  | Malayalam
  | Maltese
  | Marathi
  | Mongolian
  | Nepali
  | Norwegian
  | Oriya
  | Pashto
  | Persian
  | Polish
  | Portuguese
  | Punjabi
  | Romanian
  | Russian
  | Sanskrit
  | Serbian
  | Sindhi
  | Sinhalese
  | Slovak
  | Slovenian
  | Spanish
  | Swahili
  | Swedish
  | Tajik
  | Tamil
  | Tagalog
  | Telugu
  | Thai
  | Tibetan
  | Turkish
  | Ukrainian
  | Urdu
  | Uzbek
  | Uighur
  | Vietnamese
  | Unknown
    deriving (Show, Eq, Ord, Read)

-- | Get translation from Google Translate.
translate :: B.ByteString        -- ^ the byte string need to translate
          -> Maybe Language      -- ^ source language, or 'Nothing' use automatic language detect
          -> Language            -- ^ destination language
          -> IO (Either String B.ByteString) 
translate text srcLang destLang = do
  -- Build url.
  let url = "http://ajax.googleapis.com/ajax/services/language/translate?v=1.0" 
            -- Request text. 
            ++ ("&q=" ++ C.encode (B.unpack text)) 
            -- Source language and destination language.
            ++ ("&langpair=" 
                ++ (maybe "" getLanguageCode srcLang
                    ++ "|" 
                    ++ getLanguageCode destLang))

  -- Request JSON data.
  string <- openURI url
  
  return $ 
    case string of
        Left err -> Left err
        Right jsonString -> 
            case parseJSON jsonString of
              Left err -> Left err
              Right jsvalue -> 
                  case lookupDeep ["responseData", "translatedText"] jsvalue of
                    Just (JSString str) -> Right str
                    _ -> Left "translate : parse failed."

-- | Get language code.
getLanguageCode :: Language -> String
getLanguageCode lang = 
  case lang of
    Afrikaans          -> "af"
    Albanian           -> "sq"
    Amharic            -> "am"
    Arabic             -> "ar"
    Armenian           -> "hy"
    Azerbaijani        -> "az"
    Basque             -> "eu"
    Belarusian         -> "be"
    Bengali            -> "bn"
    Bihari             -> "bh"
    Bulgarian          -> "bg"
    Burmese            -> "my"
    Catalan            -> "ca"
    Cherokee           -> "chr"
    Chinese            -> "zh"
    ChineseSimplified  -> "zh-CN"
    ChineseTraditional -> "zh-TW"
    Croatian           -> "hr"
    Czech              -> "cs"
    Danish             -> "da"
    Dhivehi            -> "dv"
    Dutch              -> "nl"
    English            -> "en"
    Esperanto          -> "eo"
    Estonian           -> "et"
    Filipino           -> "tl"
    Finnish            -> "fi"
    French             -> "fr"
    Galician           -> "gl"
    Georgian           -> "ka"
    German             -> "de"
    Greek              -> "el"
    Guarani            -> "gn"
    Gujarati           -> "gu"
    Hebrew             -> "iw"
    Hindi              -> "hi"
    Hungarian          -> "hu"
    Icelandic          -> "is"
    Indonesian         -> "id"
    Inuktitut          -> "iu"
    Italian            -> "it"
    Japanese           -> "ja"
    Kannada            -> "kn"
    Kazakh             -> "kk"
    Khmer              -> "km"
    Korean             -> "ko"
    Kurdish            -> "ku"
    Kyrgyz             -> "ky"
    Laothian           -> "lo"
    Latvian            -> "lv"
    Lithuanian         -> "lt"
    Macedonian         -> "mk"
    Malay              -> "ms"
    Malayalam          -> "ml"
    Maltese            -> "mt"
    Marathi            -> "mr"
    Mongolian          -> "mn"
    Nepali             -> "ne"
    Norwegian          -> "no"
    Oriya              -> "or"
    Pashto             -> "ps"
    Persian            -> "fa"
    Polish             -> "pl"
    Portuguese         -> "pt-PT"
    Punjabi            -> "pa"
    Romanian           -> "ro"
    Russian            -> "ru"
    Sanskrit           -> "sa"
    Serbian            -> "sr"
    Sindhi             -> "sd"
    Sinhalese          -> "si"
    Slovak             -> "sk"
    Slovenian          -> "sl"
    Spanish            -> "es"
    Swahili            -> "sw"
    Swedish            -> "sv"
    Tajik              -> "tg"
    Tamil              -> "ta"
    Tagalog            -> "tl"
    Telugu             -> "te"
    Thai               -> "th"
    Tibetan            -> "bo"
    Turkish            -> "tr"
    Ukrainian          -> "uk"
    Urdu               -> "ur"
    Uzbek              -> "uz"
    Uighur             -> "ug"
    Vietnamese         -> "vi"
    Unknown            -> ""