module Foundation.String.Encoding.UTF16
( UTF16(..)
, UTF16_Invalid(..)
) where
import Foundation.Internal.Base
import Foundation.Internal.Types
import Foundation.Primitive.Monad
import GHC.Prim
import GHC.Word
import GHC.Types
import Foundation.Numerical
import Data.Bits
import qualified Prelude
import Foundation.Array.Unboxed
import Foundation.Collection.Buildable
import Foundation.String.Encoding.Encoding
data UTF16_Invalid
= InvalidContinuation
| InvalidUnicode Char
deriving (Show, Eq, Typeable)
instance Exception UTF16_Invalid
data UTF16 = UTF16
instance Encoding UTF16 where
type Unit UTF16 = Word16
type Error UTF16 = UTF16_Invalid
encodingNext _ = next
encodingWrite _ = write
next :: (Offset Word16 -> Word16)
-> Offset Word16
-> Either UTF16_Invalid (Char, Offset Word16)
next getter off
| h < 0xd800 = Right (toChar hh, off + Offset 1)
| h >= 0xe000 = Right (toChar hh, off + Offset 1)
| otherwise = nextContinuation
where
h :: Word16
!h@(W16# hh) = getter off
toChar :: Word# -> Char
toChar w = C# (chr# (word2Int# w))
to32 :: Word16 -> Word32
to32 (W16# w) = W32# w
nextContinuation
| cont >= 0xdc00 && cont < 0xe00 =
let !(W32# w) = ((to32 h .&. 0x3ff) `shiftL` 10)
.|. (to32 cont .&. 0x3ff)
in Right (toChar w, off + Offset 2)
| otherwise = Left InvalidContinuation
where
cont :: Word16
!cont = getter $ off + Offset 1
write :: (PrimMonad st, Monad st)
=> Char
-> Builder (UArray Word16) st ()
write c
| c < toEnum 0xd800 = append $ w16 c
| c > toEnum 0x10000 = let (w1, w2) = wHigh c in append w1 >> append w2
| c > toEnum 0x10ffff = throw $ InvalidUnicode c
| c >= toEnum 0xe000 = append $ w16 c
| otherwise = throw $ InvalidUnicode c
where
w16 :: Char -> Word16
w16 (C# ch) = W16# (int2Word# (ord# ch))
to16 :: Word32 -> Word16
to16 = Prelude.fromIntegral
wHigh :: Char -> (Word16, Word16)
wHigh (C# ch) =
let v = W32# (minusWord# (int2Word# (ord# ch)) 0x10000##)
in (0xdc00 .|. to16 (v `shiftR` 10), 0xd800 .|. to16 (v .&. 0x3ff))