module Data.Encoding.UTF32
(UTF32(..))
where
import Data.Bits
import Data.Char (ord,chr)
import Data.Encoding.Base
import Data.Word
import Control.Exception (throwDyn)
import Data.Typeable
import qualified Data.ByteString as BS
import qualified Data.ByteString.Lazy as LBS
data UTF32
= UTF32
| UTF32BE
| UTF32LE
deriving (Eq,Show,Typeable)
bom :: Char
bom = '\xFEFF'
instance Encoding UTF32 where
encode UTF32 str = encodeMultibyte encodeUTF32be (bom:str)
encode UTF32LE str = encodeMultibyte encodeUTF32le str
encode UTF32BE str = encodeMultibyte encodeUTF32be str
encodeLazy UTF32 str = encodeMultibyteLazy encodeUTF32be (bom:str)
encodeLazy UTF32LE str = encodeMultibyteLazy encodeUTF32le str
encodeLazy UTF32BE str = encodeMultibyteLazy encodeUTF32be str
encodable _ c = ord c < 0x0010FFFF
decode UTF32 str = let
(start,rest) = BS.splitAt 4 str
in case BS.unpack start of
[0x00,0x00,0xFE,0xFF] -> decode UTF32BE rest
[0xFE,0xFF,0x00,0x00] -> decode UTF32LE rest
_ -> decode UTF32BE str
decode UTF32LE str = decodeMultibyte decodeUTF32le str
decode UTF32BE str = decodeMultibyte decodeUTF32be str
decodeLazy UTF32 str = let
(start,rest) = LBS.splitAt 4 str
in case LBS.unpack start of
[0x00,0x00,0xFE,0xFF] -> decodeLazy UTF32BE rest
[0xFE,0xFF,0x00,0x00] -> decodeLazy UTF32LE rest
_ -> decodeLazy UTF32BE str
decodeLazy UTF32LE str = decodeMultibyteLazy decodeUTF32le str
decodeLazy UTF32BE str = decodeMultibyteLazy decodeUTF32be str
encodeUTF32be :: Char -> (Word8,EncodeState)
encodeUTF32be ch = let
w = ord ch
w1 = fromIntegral $ w `shiftR` 24
w2 = fromIntegral $ w `shiftR` 16
w3 = fromIntegral $ w `shiftR` 8
w4 = fromIntegral $ w
in (w1,Put3 w2 w3 w4)
encodeUTF32le :: Char -> (Word8,EncodeState)
encodeUTF32le ch = let
w = ord ch
w1 = fromIntegral $ w `shiftR` 24
w2 = fromIntegral $ w `shiftR` 16
w3 = fromIntegral $ w `shiftR` 8
w4 = fromIntegral $ w
in (w4,Put3 w3 w2 w1)
decodeUTF32be :: [Word8] -> (Char,[Word8])
decodeUTF32be (w1:w2:w3:w4:rest) = let
v = (fromIntegral w1 `shiftL` 24) .|.
(fromIntegral w2 `shiftL` 16) .|.
(fromIntegral w3 `shiftL` 8) .|.
(fromIntegral w4)
in if v < 0x0010FFFF
then (chr v,rest)
else throwDyn (IllegalRepresentation [w1,w2,w3,w4])
decodeUTF32be _ = throwDyn UnexpectedEnd
decodeUTF32le :: [Word8] -> (Char,[Word8])
decodeUTF32le (w1:w2:w3:w4:rest) = let
v = (fromIntegral w4 `shiftL` 24) .|.
(fromIntegral w3 `shiftL` 16) .|.
(fromIntegral w2 `shiftL` 8) .|.
(fromIntegral w1)
in if v < 0x0010FFFF
then (chr v,rest)
else throwDyn (IllegalRepresentation [w1,w2,w3,w4])
decodeUTF32le _ = throwDyn UnexpectedEnd