module WASH.Utility.RFC2279 (encode, decode) where
import Char
encode :: String -> String
decode :: String -> String
factors = iterate (* 0x40) 1
f1 = factors !! 1
f2 = factors !! 2
f3 = factors !! 3
f4 = factors !! 4
f5 = factors !! 5
encode [] = []
encode (x:xs) =
let r0 = ord x in
if r0 < 0x80 then
x : encode xs
else if r0 < 0x800 then
let c1 = 0xC0 + r0 `div` f1
c2 = 0x80 + r0 `mod` f1
in chr c1 : chr c2 : encode xs
else if r0 < 0x10000 then
let c1 = 0xE0 + r0 `div` f2
r1 = r0 `mod` f2
c2 = 0x80 + r1 `div` f1
c3 = 0x80 + r1 `mod` f1
in chr c1 : chr c2 : chr c3 : encode xs
else if r0 < 0x200000 then
let c1 = 0xF0 + r0 `div` f3
r1 = r0 `mod` f3
c2 = 0x80 + r1 `div` f2
r2 = r1 `mod` f2
c3 = 0x80 + r2 `div` f1
c4 = 0x80 + r2 `mod` f1
in chr c1 : chr c2 : chr c3 : chr c4 : encode xs
else if r0 < 0x4000000 then
let c1 = 0xF8 + r0 `div` f4
r1 = r0 `mod` f4
c2 = 0x80 + r1 `div` f3
r2 = r1 `mod` f3
c3 = 0x80 + r2 `div` f2
r3 = r2 `mod` f2
c4 = 0x80 + r3 `div` f1
c5 = 0x80 + r3 `mod` f1
in chr c1 : chr c2 : chr c3 : chr c4 : chr c5 : encode xs
else
let c1 = 0xFC + r0 `div` f5
r1 = r0 `mod` f5
c2 = 0x80 + r1 `div` f4
r2 = r1 `mod` f4
c3 = 0x80 + r2 `div` f3
r3 = r2 `mod` f3
c4 = 0x80 + r3 `div` f2
r4 = r3 `mod` f2
c5 = 0x80 + r4 `div` f1
c6 = 0x80 + r4 `mod` f1
in chr c1 : chr c2 : chr c3 : chr c4 : chr c5 : chr c6 : encode xs
decode [] = []
decode (x : xs) =
let ordx = ord x in
if ordx < 0x80 then
x : decode xs
else if ordx < 0xC0 then
error "UTF-8 decoding out of sync"
else if ordx < 0xE0 then
decoden 1 (ordx 0xC0) xs
else if ordx < 0xF0 then
decoden 2 (ordx 0xE0) xs
else if ordx < 0xF8 then
decoden 3 (ordx 0xF0) xs
else if ordx < 0xFC then
decoden 4 (ordx 0xF8) xs
else if ordx < 0xFE then
decoden 5 (ordx 0xFC) xs
else
error "UTF-8 decoding found illegal start octet"
decoden :: Int -> Int -> String -> String
decoden 0 v xs =
chr v : decode xs
decoden n v (x : xs) =
let ordx = ord x
v' = f1 * v + ordx 0x80
in
if ordx >= 0x80 && ordx < 0xC0 then
decoden (n1) v' xs
else
error "UTF-8 decoding found illegal continuation octet"