{-# LANGUAGE DeriveDataTypeable #-}
{- | This module implements UTF-16 encoding and decoding as in RFC 2781.
     See <http://en.wikipedia.org/wiki/UTF-16> for more information.
 -}
module Data.Encoding.UTF16
    (UTF16(..)
    ) where

import Data.Encoding.Base
import Data.Encoding.ByteSink
import Data.Encoding.ByteSource
import Data.Encoding.Exception

import Control.Throws
import Data.Bits
import Data.Char
import Data.Typeable
import Data.Word

data UTF16
    = UTF16	-- ^ Decodes big and little endian, encodes big endian.
    | UTF16BE	-- ^ Big endian decoding and encoding, fails if the string isn\'t actually big endian.
    | UTF16LE	-- ^ Little endian decoding and encoding.
      deriving (UTF16 -> UTF16 -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF16 -> UTF16 -> Bool
$c/= :: UTF16 -> UTF16 -> Bool
== :: UTF16 -> UTF16 -> Bool
$c== :: UTF16 -> UTF16 -> Bool
Eq,Int -> UTF16 -> ShowS
[UTF16] -> ShowS
UTF16 -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF16] -> ShowS
$cshowList :: [UTF16] -> ShowS
show :: UTF16 -> String
$cshow :: UTF16 -> String
showsPrec :: Int -> UTF16 -> ShowS
$cshowsPrec :: Int -> UTF16 -> ShowS
Show,Typeable)

readBOM :: ByteSource m => m (Either Char UTF16)
readBOM :: forall (m :: * -> *). ByteSource m => m (Either Char UTF16)
readBOM = do
  Char
ch <- forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF16
UTF16
  case Char
ch of
    Char
'\xFEFF' -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. b -> Either a b
Right UTF16
UTF16BE)
    Char
'\xFFFE' -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. b -> Either a b
Right UTF16
UTF16LE)
    Char
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a b. a -> Either a b
Left Char
ch)
  
decodeUTF16 :: ByteSource m => (m Word16) -> m Char
decodeUTF16 :: forall (m :: * -> *). ByteSource m => m Word16 -> m Char
decodeUTF16 m Word16
fetch = do
  Word16
w1 <- m Word16
fetch
  if Word16
w1 forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
w1 forall a. Ord a => a -> a -> Bool
> Word16
0xDFFF
    then forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> Char
chr forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
w1)
    else (if Word16
w1 forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF
            then forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
w1 forall a. Bits a => a -> Int -> a
`shiftR` Int
8)))
            else (do
                   Word16
w2 <- m Word16
fetch
                   if Word16
w2 forall a. Ord a => a -> a -> Bool
< Word16
0xDC00 Bool -> Bool -> Bool
|| Word16
w2 forall a. Ord a => a -> a -> Bool
> Word16
0xDFFF
                     then forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
w2 forall a. Bits a => a -> Int -> a
`shiftR` Int
8)))
                     else let v :: Int
v = ((forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
w1 forall a. Bits a => a -> a -> a
.&. Word16
0x3FF)) forall a. Bits a => a -> Int -> a
`shiftL` Int
10)
                                  forall a. Bits a => a -> a -> a
.|. (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
w2 forall a. Bits a => a -> a -> a
.&. Word16
0x3FF))
                          in forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int
vforall a. Num a => a -> a -> a
+Int
0x10000)
                 )
         )

encodeUTF16 :: ByteSink m => (Word16 -> m ()) -> Char -> m ()
encodeUTF16 :: forall (m :: * -> *).
ByteSink m =>
(Word16 -> m ()) -> Char -> m ()
encodeUTF16 Word16 -> m ()
push Char
ch
    | Int
valforall a. Ord a => a -> a -> Bool
<=Int
0xDFFF Bool -> Bool -> Bool
&& Int
valforall a. Ord a => a -> a -> Bool
>=Int
0xD800 = forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Char -> EncodingException
HasNoRepresentation Char
ch)
    | Int
valforall a. Ord a => a -> a -> Bool
<=Int
0x0000FFFF = Word16 -> m ()
push forall a b. (a -> b) -> a -> b
$ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
val
    | Int
valforall a. Ord a => a -> a -> Bool
<=Int
0x0010FFFF = let v :: Int
v = Int
val forall a. Num a => a -> a -> a
- Int
0x10000
                            w1 :: Word16
w1 = (forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
v forall a. Bits a => a -> Int -> a
`shiftR` Int
10)) forall a. Bits a => a -> a -> a
.|. Word16
0xD800
                            w2 :: Word16
w2 = ((forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
v) forall a. Bits a => a -> a -> a
.&. Word16
0x3FF) forall a. Bits a => a -> a -> a
.|. Word16
0xDC00
                        in Word16 -> m ()
push Word16
w1 forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Word16 -> m ()
push Word16
w2
    | Bool
otherwise = forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Char -> EncodingException
HasNoRepresentation Char
ch)
    where
      val :: Int
val = Char -> Int
ord Char
ch

instance Encoding UTF16 where
    encodeChar :: forall (m :: * -> *). ByteSink m => UTF16 -> Char -> m ()
encodeChar UTF16
UTF16LE = forall (m :: * -> *).
ByteSink m =>
(Word16 -> m ()) -> Char -> m ()
encodeUTF16 forall (m :: * -> *). ByteSink m => Word16 -> m ()
pushWord16le
    encodeChar UTF16
_ = forall (m :: * -> *).
ByteSink m =>
(Word16 -> m ()) -> Char -> m ()
encodeUTF16 forall (m :: * -> *). ByteSink m => Word16 -> m ()
pushWord16be
    decodeChar :: forall (m :: * -> *). ByteSource m => UTF16 -> m Char
decodeChar UTF16
UTF16LE = forall (m :: * -> *). ByteSource m => m Word16 -> m Char
decodeUTF16 forall (m :: * -> *). ByteSource m => m Word16
fetchWord16le
    decodeChar UTF16
_ = forall (m :: * -> *). ByteSource m => m Word16 -> m Char
decodeUTF16 forall (m :: * -> *). ByteSource m => m Word16
fetchWord16be

    encode :: forall (m :: * -> *). ByteSink m => UTF16 -> String -> m ()
encode UTF16
UTF16 String
str = do
      forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF16
UTF16 Char
'\xFEFF'
      forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF16
UTF16) String
str
    encode UTF16
enc String
str = forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (forall enc (m :: * -> *).
(Encoding enc, ByteSink m) =>
enc -> Char -> m ()
encodeChar UTF16
enc) String
str

    decode :: forall (m :: * -> *). ByteSource m => UTF16 -> m String
decode UTF16
UTF16 = do
      Either Char UTF16
res <- forall (m :: * -> *). ByteSource m => m (Either Char UTF16)
readBOM
      case Either Char UTF16
res of
        Left Char
c -> do
                 String
cs <- forall (m :: * -> *) a. Monad m => m Bool -> m a -> m [a]
untilM forall (m :: * -> *). ByteSource m => m Bool
sourceEmpty (forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF16
UTF16BE)
                 forall (m :: * -> *) a. Monad m => a -> m a
return (Char
cforall a. a -> [a] -> [a]
:String
cs)
        Right UTF16
bom -> forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m String
decode UTF16
bom
    decode UTF16
enc = forall (m :: * -> *) a. Monad m => m Bool -> m a -> m [a]
untilM forall (m :: * -> *). ByteSource m => m Bool
sourceEmpty (forall enc (m :: * -> *).
(Encoding enc, ByteSource m) =>
enc -> m Char
decodeChar UTF16
enc)
    encodeable :: UTF16 -> Char -> Bool
encodeable UTF16
_ Char
c = (Char
c forall a. Ord a => a -> a -> Bool
> Char
'\xDFFF' Bool -> Bool -> Bool
&& Char
c forall a. Ord a => a -> a -> Bool
<= Char
'\x10FFFF') Bool -> Bool -> Bool
|| Char
c forall a. Ord a => a -> a -> Bool
< Char
'\xD800'