-- | Decode encoded-words (RFC 2047)
module Dew(decodeEncodedWords,convertCharset,convertCharset') where
import Base64
import Dqp
import Utils2(words',strToLower,chopList,breakAt)
import Unicode(decodeUTF8)
import Data.Array

decodeEncodedWords :: String -> String
decodeEncodedWords = concatMap decodeWord . words'

decodeWord w =
    case chopList (breakAt '?') w of
      ["=",charset,encoding,etext,"="]
         | "?" `notElem` [charset,encoding,etext] ->
	   convertCharset charset (decodeEtext encoding etext)
      _ -> w

-- This doesn't really belong here...
convertCharset :: String -> String -> String
convertCharset charset = maybe note snd (convertCharset' charset)
  where note = (("[Unknown charset "++charset++"]")++) -- !!!

convertCharset' charset =
    case l of
      "utf-8" -> Just (l,decodeUTF8)
      "iso-8859-1" -> Just (l,id)
      "iso-8859-15" -> Just (l,map latin9)
      "us-ascii" -> Just (l,id)
      "windows-1252" -> Just (l,map wconv)
      _ -> Nothing 
  where
    l = strToLower charset

wconv c = if inRange (bounds ws) c then ws!c else c
ws=listArray ('\x80','\x9f') "€ ‚ƒ„…†‡ˆ‰Š‹Œ Ž ‘ ’“”•–—˜™š›œ žŸ"
         -- https://sv.wikipedia.org/wiki/Windows-1252

-- https://en.wikipedia.org/wiki/ISO/IEC_8859-15
latin9 c =
  case c of
    '¤' -> '€' -- A4
    '¦' -> 'Š' -- A6
    '¨' -> 'š' -- A8
    '´' -> 'Ž' -- B4
    '¸' -> 'ž' -- B8
    '¼' -> 'Œ' -- BC
    '½' -> 'œ' -- BD
    '¾' -> 'Ÿ' -- BE
    _ -> c

decodeEtext encoding etext =
  case strToLower encoding of
    "q" -> decodeQ etext
    "b" -> decodeBase64 etext
    _   -> encoding++"? "++etext

decodeQ = decodeQuotedPrintable . map uscore2space

uscore2space '_' = ' '
uscore2space c   = c