-- |
-- Module:      $Header$
-- Description: Encoding function for Punycode
-- Copyright:   Copyright © 2011 Jon Kristensen
-- License:     BSD3
--
-- Maintainer:  jon.kristensen@pontarius.org
-- Stability:   unstable
-- Portability: portable
--
-- Uniquely and reversibly transform a Unicode string into an ASCII string.
-- 0.0.2.0 will support going the other way - from ASCII to Unicode.


-- TODO: Assmuption: All code points less than initial_n (128) are basic code
-- points; code points are unsigned.


module Data.Text.Punycode (encode) where


import Data.Text (Text)

import Data.Char (chr, intToDigit, isAlphaNum, isAscii, ord, toUpper)


base :: Int

base = 36


tmin :: Int

tmin = 1


tmax :: Int

tmax = 26


skew :: Int

skew = 38


damp :: Int

damp = 700


initial_bias :: Int

initial_bias = 72


initial_n :: Int

initial_n = 128 -- The first character after the ASCII characters


-- decode will implement the following pseudo code from Section 6.2 of RFC 3492:
--
-- let n = initial_n
-- let i = 0
-- let bias = initial_bias
-- let output = an empty string indexed from 0
-- consume all code points before the last delimiter (if there is one)
--   and copy them to output, fail on any non-basic code point
-- if more than zero code points were consumed then consume one more
--   (which will be the last delimiter)
-- while the input is not exhausted do begin
--   let oldi = i
--   let w = 1
--   for k = base to infinity in steps of base do begin
--     consume a code point, or fail if there was none to consume
--     let digit = the code point's digit-value, fail if it has none
--     let i = i + digit * w, fail on overflow
--     let t = tmin if k <= bias {+ tmin}, or
--             tmax if k >= bias + tmax, or k - bias otherwise
--     if digit < t then break
--     let w = w * (base - t), fail on overflow
--   end
--   let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
--   let n = n + i div (length(output) + 1), fail on overflow
--   let i = i mod (length(output) + 1)
--   {if n is a basic code point then fail}
--   insert n into output at position i
--   increment i
-- end


-- |
-- Encodes a Unicode label to a Punycode-encoded ASCII string. The encoding does
-- not prepend the IDNA @\"xn--\"@ prefix. The output is lower-cased.

-- TODO: Support for mixed cases (see Appendix A in the RFC)

-- encode implements the following pseudo code from Section 6.3 of RFC 3492:
--
-- let n = initial_n
-- let delta = 0
-- let bias = initial_bias
-- let h = b = the number of basic code points in the input
-- copy them to the output in order, followed by a delimiter if b > 0
-- {if the input contains a non-basic code point < n then fail}
-- while h < length(input) do begin
--   let m = the minimum {non-basic} code point >= n in the input
--   let delta = delta + (m - n) * (h + 1), fail on overflow
--   let n = m
--   for each code point c in the input (in order) do begin
--     if c < n {or c is basic} then increment delta, fail on overflow
--     if c == n then begin
--       let q = delta
--       for k = base to infinity in steps of base do begin
--         let t = tmin if k <= bias {+ tmin}, or
--                 tmax if k >= bias + tmax, or k - bias otherwise
--         if q < t then break
--         output the code point for digit t + ((q - t) mod (base - t))
--         let q = (q - t) div (base - t)
--       end
--       output the code point for digit q
--       let bias = adapt(delta, h + 1, test h equals b?)
--       let delta = 0
--       increment h
--     end
--   end
--   increment delta and n
-- end

encode :: String -> Maybe String

encode i = let n = initial_n
               delta = 0
               bias = initial_bias
               h = b
               output = if (length $ basic_codepoints) == 0
                        then ""
                        else basic_codepoints ++ ['-']
           -- TODO: Fail for non-basic code point < n
           in if b == length i then Just i else loop output n delta bias h
    where

        b :: Int
        b = length basic_codepoints

        basic_codepoints :: String
        basic_codepoints = filter isAsciiAlphaNum i

        -- while h < length(input)
        loop :: String -> Int -> Int -> Int -> Int -> Maybe String
        loop output n delta bias h
            | h < (length i) = let m = minimum $ filter (>= n) $ map ord i
                                   delta' = delta + (m - n) * (h + 1) -- TODO: Overflow?
                                   n' = m
                                   (output', delta'', h') = loop' i output n' delta' bias h
                               in loop output' (n' + 1) (delta'' + 1) bias h'
            | otherwise = Just output

        -- for each code point
        loop' :: String -> String -> Int -> Int -> Int -> Int -> (String, Int, Int)
        loop' [] output _ delta _ h = (output, delta, h)
        loop' (c:cs) output n delta bias h
            | ord c < n = loop' cs output n delta bias h
            | ord c == n = let q = delta
                               (output', q') = loop'' base output bias q
                               -- TODO: output'' seems to add an unnecessary
                               -- character... I'll investigate this more
                               -- tomorrow
                               output'' = output' ++ [toDigitValue q']
                           in loop' cs output'' n 0 (adapt delta (h + 1) (h == b)) (h + 1)

        -- for k = base...
        loop'' :: Int -> String -> Int -> Int -> (String, Int)
        loop'' k output bias q
            | q < t = (output, q)
            | otherwise = let output' = output ++ [toDigitValue (t + ((q - t) `mod` (base - t)))]
                              q' = (q - t) `div` (base - t)
                          in loop'' (k + base) output' bias q'
            where
                t = if k <= bias + tmin then tmin else if k >= bias + tmax then tmax else k - bias
                          -- TODO: ^^^^


-- Returns true if the Char is ASCII, alpha-numeric, or a hyphen.

-- TODO: Ugly function

isAsciiAlphaNum :: Char -> Bool

isAsciiAlphaNum '-' = True

isAsciiAlphaNum c = isAscii c && isAlphaNum c


-- adapt implements the following pseudo code from Section 6.1 of RFC 3492:
--
-- function adapt(delta,numpoints,firsttime):
--   if firsttime then let delta = delta div damp
--   else let delta = delta div 2
--   let delta = delta + (delta div numpoints)
--   let k = 0
--   while delta > ((base - tmin) * tmax) div 2 do begin
--     let delta = delta div (base - tmin)
--     let k = k + base
--   end
--   return k + (((base - tmin + 1) * delta) div (delta + skew))

adapt :: Int -> Int -> Bool -> Int

adapt d n f = loop 0 d'
    where

        d' :: Int
        d' = if f then d `div` damp else d `div` 2 + d `div` n

        loop :: Int -> Int -> Int
        loop k d''
            | d'' > ((base - tmin) * tmax) `div` 2 = loop (k + base)
                (d'' `div` (base - tmin))
            | otherwise = k + ((base - tmin + 1) * d'') `div` (d'' + skew)


-- Maps a character to its Punycode digit value; 0-25 for characters, 26-35 for
-- digits.

-- toDigitValue :: Char -> Int
--
-- toDigitValue c | c `elem` ['0'..'9'] = digitToInt c + 30
--                | toUpper c `elem` ['A'..'Z'] = ord (toUpper c) - 65


-- The 0-25 digits are mapped to 'a'-'z', 26-35 to '0'-'9'.

toDigitValue :: Int -> Char

toDigitValue i | i >= 0 && i <= 25 = chr $ 97 + i
               | i >= 26 && i <= 35 = intToDigit $ i - 26