-- | Decode encoded-words (RFC 2047) module Dew(decodeEncodedWords,convertCharset,convertCharset') where import Base64 import Dqp import Utils2(words',strToLower,chopList,breakAt) import Unicode(decodeUTF8) import Data.Array decodeEncodedWords :: String -> String decodeEncodedWords = concatMap decodeWord . words' decodeWord w = case chopList (breakAt '?') w of ["=",charset,encoding,etext,"="] | "?" `notElem` [charset,encoding,etext] -> convertCharset charset (decodeEtext encoding etext) _ -> w -- This doesn't really belong here... convertCharset :: String -> String -> String convertCharset charset = maybe note snd (convertCharset' charset) where note = (("[Unknown charset "++charset++"]")++) -- !!! convertCharset' charset = case l of "utf-8" -> Just (l,decodeUTF8) "iso-8859-1" -> Just (l,id) "iso-8859-15" -> Just (l,map latin9) "us-ascii" -> Just (l,id) "windows-1252" -> Just (l,map wconv) _ -> Nothing where l = strToLower charset wconv c = if inRange (bounds ws) c then ws!c else c ws=listArray ('\x80','\x9f') "€ ‚ƒ„…†‡ˆ‰Š‹Œ Ž ‘ ’“”•–—˜™š›œ žŸ" -- https://sv.wikipedia.org/wiki/Windows-1252 -- https://en.wikipedia.org/wiki/ISO/IEC_8859-15 latin9 c = case c of '¤' -> '€' -- A4 '¦' -> 'Š' -- A6 '¨' -> 'š' -- A8 '´' -> 'Ž' -- B4 '¸' -> 'ž' -- B8 '¼' -> 'Œ' -- BC '½' -> 'œ' -- BD '¾' -> 'Ÿ' -- BE _ -> c decodeEtext encoding etext = case strToLower encoding of "q" -> decodeQ etext "b" -> decodeBase64 etext _ -> encoding++"? "++etext decodeQ = decodeQuotedPrintable . map uscore2space uscore2space '_' = ' ' uscore2space c = c