module Data.Encoding.UTF16
(UTF16(..)
) where
import Data.Encoding.Base
import Data.Char(ord,chr)
import Data.Bits
import Data.Int
import Data.Word
import Data.ByteString
import qualified Data.ByteString.Lazy as LBS
import Prelude hiding (length)
import Control.Exception
import Data.Dynamic (toDyn)
data UTF16
= UTF16
| UTF16BE
| UTF16LE
deriving (Eq,Show)
utf16enc :: Bool -> (EncodeState,String) -> Maybe (Word8,(EncodeState,String))
utf16enc _ (Done,[]) = Nothing
utf16enc True (Done,x:xs)
| n<=0x0000FFFF = Just
(fromIntegral $ n `shiftR` 8
,(Put1 (fromIntegral $ n),xs))
| n<=0x0010FFFF = Just
(fromIntegral $ 0xD8 .|. (n' `shiftR` 18)
,(Put3 (fromIntegral $ (n' `shiftR` 10))
(fromIntegral $
0xDC .|. ((n' `shiftR` 8) .&. 0x03))
(fromIntegral n'),xs))
| otherwise = throwDyn $ HasNoRepresentation x
where
n = ord x
n' = n 0x10000
utf16enc False (Done,x:xs)
| n<=0x0000FFFF = Just
(fromIntegral $ n
,(Put1 (fromIntegral $ n `shiftR` 8),xs))
| n<=0x0010FFFF = Just
(fromIntegral n'
,(Put3 (fromIntegral $
0xDC .|. ((n' `shiftR` 8) .&. 0x03))
(fromIntegral $ (n' `shiftR` 10))
(fromIntegral $ 0xD8 .|. (n' `shiftR` 18)),xs))
| otherwise = throwDyn $ HasNoRepresentation x
where
n = ord x
n' = n 0x10000
utf16enc _ (Put3 w1 w2 w3,xs) = Just (w1,(Put2 w2 w3,xs))
utf16enc _ (Put2 w1 w2,xs) = Just (w1,(Put1 w2,xs))
utf16enc _ (Put1 w1,xs) = Just (w1,(Done,xs))
utf16dec :: Num a => Bool -> Word8 -> Word8 -> Word8 -> Word8 -> (Char,a)
utf16dec be s1 s2 s3 s4
| w1< 0xD8 || w1> 0xDF
= (chr $ ((fromIntegral w1) `shiftL` 8) .|. (fromIntegral w2),2)
| w1> 0xDB = throwDyn $ IllegalCharacter w1
| w3< 0xDC || w3>0xDF = throwDyn $ IllegalCharacter w3
| otherwise = (chr $ (((fromIntegral w1 .&. 0x03) `shiftL` 18)
.|. ((fromIntegral w2) `shiftL` 10)
.|. ((fromIntegral w3 .&. 0x03) `shiftL` 8)
.|. (fromIntegral w4)) + 0x10000,4)
where
(w1,w2,w3,w4) = if be then (s1,s2,s3,s4) else (s2,s1,s4,s3)
instance Encoding UTF16 where
encode enc str = unfoldr (utf16enc (enc/=UTF16LE)) (case enc of
UTF16 -> Put2 0xFE 0xFF
_ -> Done,str)
encodeLazy enc str = LBS.unfoldr (utf16enc (enc/=UTF16LE)) (case enc of
UTF16 -> Put2 0xFE 0xFF
_ -> Done,str)
encodable _ c = ord c <= 0x0010FFFF
decode _ str = case findByteOrder str of
Nothing -> decode' True 0
Just big -> decode' big 2
where
decode' be i = c:decode' be (i+took)
where
(c,took) = mapException (\ex -> case ex of
ErrorCall _ -> DynException (toDyn UnexpectedEnd)
_ -> ex) (utf16dec be s1 s2 s3 s4)
s1 = index str i
s2 = index str (i+1)
s3 = index str (i+2)
s4 = index str (i+3)
decodeLazy _ str = case findByteOrderLazy str of
Nothing -> decode' True 0
Just big -> decode' big 2
where
decode' be i = c:decode' be (i+took)
where
(c,took) = mapException (\ex -> case ex of
ErrorCall _ -> DynException (toDyn UnexpectedEnd)
_ -> ex) (utf16dec be s1 s2 s3 s4)
s1 = LBS.index str i
s2 = LBS.index str (i+1)
s3 = LBS.index str (i+2)
s4 = LBS.index str (i+3)
decodable _ str = case findByteOrder str of
Nothing -> check' True (length str) 0
Just big -> check' big (length str) 2
where
check' be m i
| m == i = True
| m == i+1 = False
| w1< 0xD8 || w1> 0xDF = check' be m (i+2)
| w1> 0xDB = False
| m <= i+3 = False
| w3< 0xDC || w3>0xDF = False
| otherwise = check' be m (i+4)
where
(w1,w3) = if be then (s1,s3) else (s2,s4)
s1 = index str i
s2 = index str (i+1)
s3 = index str (i+2)
s4 = index str (i+3)
findByteOrder :: ByteString -> Maybe Bool
findByteOrder str
| length str < 2 = Nothing
| w1 == 0xFE && w2 == 0xFF = Just True
| w1 == 0xFF && w2 == 0xFE = Just False
| otherwise = Nothing
where
w1 = index str 0
w2 = index str 1
findByteOrderLazy :: LBS.ByteString -> Maybe Bool
findByteOrderLazy str = case LBS.unpack (LBS.take 2 str) of
[w1,w2]
| w1 == 0xFE && w2 == 0xFF -> Just True
| w1 == 0xFF && w2 == 0xFE -> Just False
| otherwise -> Nothing
_ -> Nothing