{-# LANGUAGE DeriveDataTypeable #-}
{- | This module implements UTF-16 encoding and decoding as in RFC 2781.
     See <http://en.wikipedia.org/wiki/UTF-16> for more information.
 -}
module Data.Encoding.UTF16
    (UTF16(..)
    ) where

import Data.Encoding.Base
import Data.Encoding.ByteSink
import Data.Encoding.ByteSource
import Data.Encoding.Exception

import Control.Throws
import Data.Bits
import Data.Char
import Data.Typeable
import Data.Word

data UTF16
    = UTF16	-- ^ Decodes big and little endian, encodes big endian.
    | UTF16BE	-- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
    | UTF16LE	-- ^ Little endian decoding and encoding.
      deriving (UTF16 -> UTF16 -> Bool
(UTF16 -> UTF16 -> Bool) -> (UTF16 -> UTF16 -> Bool) -> Eq UTF16
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF16 -> UTF16 -> Bool
$c/= :: UTF16 -> UTF16 -> Bool
== :: UTF16 -> UTF16 -> Bool
$c== :: UTF16 -> UTF16 -> Bool
Eq,Int -> UTF16 -> ShowS
[UTF16] -> ShowS
UTF16 -> String
(Int -> UTF16 -> ShowS)
-> (UTF16 -> String) -> ([UTF16] -> ShowS) -> Show UTF16
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF16] -> ShowS
$cshowList :: [UTF16] -> ShowS
show :: UTF16 -> String
$cshow :: UTF16 -> String
showsPrec :: Int -> UTF16 -> ShowS
$cshowsPrec :: Int -> UTF16 -> ShowS
Show,Typeable)

readBOM :: ByteSource m => m (Either Char UTF16)
readBOM :: m (Either Char UTF16)
readBOM = do
  Char
ch <- UTF16 -> m Char
forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF16
UTF16
  case Char
ch of
    Char
'\xFEFF' -> Either Char UTF16 -> m (Either Char UTF16)
forall (m :: * -> *) a. Monad m => a -> m a
return (UTF16 -> Either Char UTF16
forall a b. b -> Either a b
Right UTF16
UTF16BE)
    Char
'\xFFFE' -> Either Char UTF16 -> m (Either Char UTF16)
forall (m :: * -> *) a. Monad m => a -> m a
return (UTF16 -> Either Char UTF16
forall a b. b -> Either a b
Right UTF16
UTF16LE)
    Char
_ -> Either Char UTF16 -> m (Either Char UTF16)
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> Either Char UTF16
forall a b. a -> Either a b
Left Char
ch)
  
decodeUTF16 :: ByteSource m => (m Word16) -> m Char
decodeUTF16 :: m Word16 -> m Char
decodeUTF16 m Word16
fetch = do
  Word16
w1 <- m Word16
fetch
  if Word16
w1 Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
w1 Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDFFF
    then Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
w1)
    else (if Word16
w1 Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF
            then DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter (Word16 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
w1 Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftR` Int
8)))
            else (do
                   Word16
w2 <- m Word16
fetch
                   if Word16
w2 Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
< Word16
0xDC00 Bool -> Bool -> Bool
|| Word16
w2 Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
> Word16
0xDFFF
                     then DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter (Word16 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
w2 Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
`shiftR` Int
8)))
                     else let v :: Int
v = ((Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
w1 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0x3FF)) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
10)
                                  Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Word16 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
w2 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0x3FF))
                          in Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int
vInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
0x10000)
                 )
         )

encodeUTF16 :: ByteSink m => (Word16 -> m ()) -> Char -> m ()
encodeUTF16 :: (Word16 -> m ()) -> Char -> m ()
encodeUTF16 Word16 -> m ()
push Char
ch
    | Int
valInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<=Int
0xDFFF Bool -> Bool -> Bool
&& Int
valInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>=Int
0xD800 = EncodingException -> m ()
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Char -> EncodingException
HasNoRepresentation Char
ch)
    | Int
valInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<=Int
0x0000FFFF = Word16 -> m ()
push (Word16 -> m ()) -> Word16 -> m ()
forall a b. (a -> b) -> a -> b
$ Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
val
    | Int
valInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<=Int
0x0010FFFF = let v :: Int
v = Int
val Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
0x10000
                            w1 :: Word16
w1 = (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
v Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
10)) Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word16
0xD800
                            w2 :: Word16
w2 = ((Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
v) Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0x3FF) Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.|. Word16
0xDC00
                        in Word16 -> m ()
push Word16
w1 m () -> m () -> m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Word16 -> m ()
push Word16
w2
    | Bool
otherwise = EncodingException -> m ()
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Char -> EncodingException
HasNoRepresentation Char
ch)
    where
      val :: Int
val = Char -> Int
ord Char
ch

instance Encoding UTF16 where
    encodeChar :: UTF16 -> Char -> m ()
encodeChar UTF16
UTF16LE = (Word16 -> m ()) -> Char -> m ()
forall (m :: * -> *).
ByteSink m =>
(Word16 -> m ()) -> Char -> m ()
encodeUTF16 Word16 -> m ()
forall (m :: * -> *). ByteSink m => Word16 -> m ()
pushWord16le
    encodeChar UTF16
_ = (Word16 -> m ()) -> Char -> m ()
forall (m :: * -> *).
ByteSink m =>
(Word16 -> m ()) -> Char -> m ()
encodeUTF16 Word16 -> m ()
forall (m :: * -> *). ByteSink m => Word16 -> m ()
pushWord16be
    decodeChar :: UTF16 -> m Char
decodeChar UTF16
UTF16LE = m Word16 -> m Char
forall (m :: * -> *). ByteSource m => m Word16 -> m Char
decodeUTF16 m Word16
forall (m :: * -> *). ByteSource m => m Word16
fetchWord16le
    decodeChar UTF16
_ = m Word16 -> m Char
forall (m :: * -> *). ByteSource m => m Word16 -> m Char
decodeUTF16 m Word16
forall (m :: * -> *). ByteSource m => m Word16
fetchWord16be

    encode :: UTF16 -> String -> m ()
encode UTF16
UTF16 String
str = do
      UTF16 -> Char -> m ()
forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF16
UTF16 Char
'\xFEFF'
      (Char -> m ()) -> String -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (UTF16 -> Char -> m ()
forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF16
UTF16) String
str
    encode UTF16
enc String
str = (Char -> m ()) -> String -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (UTF16 -> Char -> m ()
forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF16
enc) String
str

    decode :: UTF16 -> m String
decode UTF16
UTF16 = do
      Either Char UTF16
res <- m (Either Char UTF16)
forall (m :: * -> *). ByteSource m => m (Either Char UTF16)
readBOM
      case Either Char UTF16
res of
        Left Char
c -> do
                 String
cs <- m Bool -> m Char -> m String
forall (m :: * -> *) a. Monad m => m Bool -> m a -> m [a]
untilM m Bool
forall (m :: * -> *). ByteSource m => m Bool
sourceEmpty (UTF16 -> m Char
forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF16
UTF16BE)
                 String -> m String
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
cChar -> ShowS
forall a. a -> [a] -> [a]
:String
cs)
        Right UTF16
bom -> UTF16 -> m String
forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m String
decode UTF16
bom
    decode UTF16
enc = m Bool -> m Char -> m String
forall (m :: * -> *) a. Monad m => m Bool -> m a -> m [a]
untilM m Bool
forall (m :: * -> *). ByteSource m => m Bool
sourceEmpty (UTF16 -> m Char
forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF16
enc)
    encodeable :: UTF16 -> Char -> Bool
encodeable UTF16
_ Char
c = (Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
> Char
'\xDFFF' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x10FFFF') Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
'\xD800'