-- |
-- Module      : Foundation.String.Encoding.UTF16
-- License     : BSD-style
-- Maintainer  : Foundation
-- Stability   : experimental
-- Portability : portable
--
{-# LANGUAGE MagicHash #-}
module Foundation.String.Encoding.UTF16
    ( UTF16(..)
    , UTF16_Invalid(..)
    ) where

import Foundation.Internal.Base
import Foundation.Internal.Types
import Foundation.Primitive.Monad
import GHC.Prim
import GHC.Word
import GHC.Types
import Foundation.Numerical
import Data.Bits
import qualified Prelude
import Foundation.Array.Unboxed
import Foundation.Collection.Buildable

import Foundation.String.Encoding.Encoding

data UTF16_Invalid
    = InvalidContinuation
    | InvalidUnicode Char
  deriving (Show, Eq, Typeable)
instance Exception UTF16_Invalid

data UTF16 = UTF16

instance Encoding UTF16 where
    type Unit UTF16 = Word16
    type Error UTF16 = UTF16_Invalid
    encodingNext  _ = next
    encodingWrite _ = write


--
-- U+0000 to U+D7FF and U+E000 to U+FFFF : 1 bytes
-- U+10000 to U+10FFFF :
--    * 0x010000 is subtracted from the code point, leaving a 20-bit number in the range 0..0x0FFFFF.
--    * The top ten bits (a number in the range 0..0x03FF) are added to 0xD800 to give the first 16-bit code unit
--      or high surrogate, which will be in the range 0xD800..0xDBFF.
--    * The low ten bits (also in the range 0..0x03FF) are added to 0xDC00 to give the second 16-bit code unit
--      or low surrogate, which will be in the range 0xDC00..0xDFFF.

next :: (Offset Word16 -> Word16)
     -> Offset Word16
     -> Either UTF16_Invalid (Char, Offset Word16)
next getter off
    | h <  0xd800 = Right (toChar hh, off + Offset 1)
    | h >= 0xe000 = Right (toChar hh, off + Offset 1)
    | otherwise   = nextContinuation
  where
    h :: Word16
    !h@(W16# hh) = getter off
    toChar :: Word# -> Char
    toChar w = C# (chr# (word2Int# w))
    to32 :: Word16 -> Word32
    to32 (W16# w) = W32# w

    nextContinuation
        | cont >= 0xdc00 && cont < 0xe00 =
            let !(W32# w) = ((to32 h .&. 0x3ff) `shiftL` 10)
                         .|. (to32 cont .&. 0x3ff)
             in Right (toChar w, off + Offset 2)
        | otherwise = Left InvalidContinuation
      where
        cont :: Word16
        !cont = getter $ off + Offset 1

write :: (PrimMonad st, Monad st)
      => Char
      -> Builder (UArray Word16) st ()
write c
    | c < toEnum 0xd800   = append $ w16 c
    | c > toEnum 0x10000  = let (w1, w2) = wHigh c in append w1 >> append w2
    | c > toEnum 0x10ffff = throw $ InvalidUnicode c
    | c >= toEnum 0xe000  = append $ w16 c
    | otherwise = throw $ InvalidUnicode c
  where
    w16 :: Char -> Word16
    w16 (C# ch) = W16# (int2Word# (ord# ch))

    to16 :: Word32 -> Word16
    to16 = Prelude.fromIntegral

    wHigh :: Char -> (Word16, Word16)
    wHigh (C# ch) =
        let v = W32# (minusWord# (int2Word# (ord# ch)) 0x10000##)
         in (0xdc00 .|. to16 (v `shiftR` 10), 0xd800 .|. to16 (v .&. 0x3ff))