{-
 Codec for de/encoding form data shipped in URL query strings
 or in POST request bodies. (application/x-www-form-urlencoded)
 (cf. RFC 3986.)
-}
module Web.Codec.URLEncoder 
       ( encodeString
       , decodeString

       , isUTF8Encoded
       , utf8Encode
       ) where

import qualified Codec.Binary.UTF8.String as UTF8 ( encodeString )
import Web.Codec.Percent ( getEncodedChar, getDecodedChar )

-- for isUTF8Encoded
import Data.Bits
import Data.Word ( Word32 )

utf8Encode :: String -> String
utf8Encode str
 | isUTF8Encoded str = str
 | otherwise         = UTF8.encodeString str

encodeString :: String -> String
encodeString str = go (utf8Encode str)
 where
  go "" = ""
  go (' ':xs) = '+':go xs
  go ('\r':'\n':xs) = '%':'0':'D':'%':'0':'A':go xs
  go ('\r':xs) = go ('\r':'\n':xs)
  go ('\n':xs) = go ('\r':'\n':xs)
  go (x:xs) = 
    case getEncodedChar x of
      Nothing -> x : go xs
      Just ss -> ss ++ go xs
      
decodeString :: String -> String
decodeString "" = ""
decodeString ('+':xs) = ' ':decodeString xs
decodeString ls@(x:xs) = 
  case getDecodedChar ls of
    Nothing -> x : decodeString xs
    Just (ch,xs1) -> ch : decodeString xs1


-- | @isUTF8Encoded str@ tries to recognize input string as being in UTF-8 form.
-- Will soon migrate to @utf8-string@.
isUTF8Encoded :: String -> Bool
isUTF8Encoded [] = True
isUTF8Encoded (x:xs) = 
  case ox of
    _ | ox < 0x80  -> isUTF8Encoded xs
      | ox > 0xff  -> False
      | ox < 0xc0  -> False
      | ox < 0xe0  -> check1
      | ox < 0xf0  -> check_byte 2 0xf 0
      | ox < 0xf8  -> check_byte 3 0x7  0x10000
      | ox < 0xfc  -> check_byte 4 0x3  0x200000
      | ox < 0xfe  -> check_byte 5 0x1  0x4000000
      | otherwise  -> False
 where
   ox = toW32 x
   
   toW32 :: Char -> Word32
   toW32 ch = fromIntegral (fromEnum ch)

   check1 = 
    case xs of
     [] -> False
     c1 : ds 
      | oc .&. 0xc0 /= 0x80 || d < 0x000080 -> False
      | otherwise -> isUTF8Encoded ds
      where
       oc = toW32 c1
       d = ((ox .&. 0x1f) `shiftL` 6) .|.  (oc .&. 0x3f)

   check_byte :: Int -> Word32 -> Word32 -> Bool
   check_byte i mask overlong = aux i xs (ox .&. mask)
      where
        aux 0 rs acc
         | overlong <= acc && 
	   acc <= 0x10ffff &&
           (acc < 0xd800 || 0xdfff < acc) &&
           (acc < 0xfffe || 0xffff < acc) = isUTF8Encoded rs
         | otherwise = False

        aux n (r:rs) acc
         | toW32 r .&. 0xc0 == 0x80 = 
	    aux (n-1) rs  (acc `shiftL` 6 .|. (toW32 r .&. 0x3f))

        aux _ _  _ = False