{-# LANGUAGE DeriveDataTypeable #-}
module Data.Encoding.UTF8 where
import Control.Throws
import Data.Char
import Data.Bits
import Data.Encoding.Base
import Data.Encoding.ByteSource
import Data.Encoding.ByteSink
import Data.Encoding.Exception
import Data.Typeable
data UTF8 = UTF8
| UTF8Strict
deriving (UTF8 -> UTF8 -> Bool
(UTF8 -> UTF8 -> Bool) -> (UTF8 -> UTF8 -> Bool) -> Eq UTF8
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UTF8 -> UTF8 -> Bool
$c/= :: UTF8 -> UTF8 -> Bool
== :: UTF8 -> UTF8 -> Bool
$c== :: UTF8 -> UTF8 -> Bool
Eq,Int -> UTF8 -> ShowS
[UTF8] -> ShowS
UTF8 -> String
(Int -> UTF8 -> ShowS)
-> (UTF8 -> String) -> ([UTF8] -> ShowS) -> Show UTF8
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UTF8] -> ShowS
$cshowList :: [UTF8] -> ShowS
show :: UTF8 -> String
$cshow :: UTF8 -> String
showsPrec :: Int -> UTF8 -> ShowS
$cshowsPrec :: Int -> UTF8 -> ShowS
Show,Typeable)
instance Encoding UTF8 where
encodeChar :: UTF8 -> Char -> m ()
encodeChar UTF8
_ Char
c
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x0000007F = Int -> m ()
p8 Int
n
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x000007FF = do
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0xC0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x0000FFFF = do
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0xE0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12)
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)
| Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x0010FFFF = do
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0xF0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
18)
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)
Int -> m ()
p8 (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)
| Bool
otherwise = EncodingException -> m ()
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Char -> EncodingException
HasNoRepresentation Char
c)
where
n :: Int
n = Char -> Int
ord Char
c
p8 :: Int -> m ()
p8 = Word8 -> m ()
forall (m :: * -> *). ByteSink m => Word8 -> m ()
pushWord8(Word8 -> m ()) -> (Int -> Word8) -> Int -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
.Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral
encodeable :: UTF8 -> Char -> Bool
encodeable UTF8
_ Char
c = Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x10FFFF'
decodeChar :: UTF8 -> m Char
decodeChar UTF8
UTF8 = do
Word8
w1 <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
case () of
()
_
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F -> Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w1
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xBF -> DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter Word8
w1)
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xDF -> do
Word8
w2 <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$
((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x1F) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Word8
w2 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F)
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xEF -> do
Word8
w2 <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
Word8
w3 <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
let v1 :: Word8
v1 = Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x0F
v2 :: Word8
v2 = Word8
w2 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
v3 :: Word8
v3 = Word8
w3 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$
((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v1) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
12)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v2) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v3)
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xF7 -> do
Word8
w2 <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
Word8
w3 <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
Word8
w4 <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
let v1 :: Word8
v1 = Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x07
v2 :: Word8
v2 = Word8
w2 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
v3 :: Word8
v3 = Word8
w3 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
v4 :: Word8
v4 = Word8
w4 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
v :: Int
v = ((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v1) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
18)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v2) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
12)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v3) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v4)
if Int
v Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x10FFFF
then Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr Int
v
else DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException ([Word8] -> DecodingException
IllegalRepresentation [Word8
w1,Word8
w2,Word8
w3,Word8
w4])
| Bool
otherwise -> DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter Word8
w1)
decodeChar UTF8
UTF8Strict = do
Word8
w1 <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
case () of
()
_
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F -> Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w1
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xBF -> DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter Word8
w1)
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xDF -> do
Word8
w2 <- m Word8
fetchExtend8
let v1 :: Word8
v1 = Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x1F
if Word8
v1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
1
then DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException ([Word8] -> DecodingException
IllegalRepresentation [Word8
w1,Word8
w2])
else Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$
((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v1) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ Word8
w2 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F)
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xEF -> do
Word8
w2 <- m Word8
fetchExtend8
Word8
w3 <- m Word8
fetchExtend8
let v1 :: Word8
v1 = Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x0F
v2 :: Word8
v2 = Word8
w2 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
v3 :: Word8
v3 = Word8
w3 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
if Word8
v1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 Bool -> Bool -> Bool
&& Word8
v2 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0x20
then DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException ([Word8] -> DecodingException
IllegalRepresentation [Word8
w1,Word8
w2,Word8
w3])
else Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$
((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v1) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
12)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v2) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v3)
| Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xF7 -> do
Word8
w2 <- m Word8
fetchExtend8
Word8
w3 <- m Word8
fetchExtend8
Word8
w4 <- m Word8
fetchExtend8
let v1 :: Word8
v1 = Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x07
v2 :: Word8
v2 = Word8
w2 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
v3 :: Word8
v3 = Word8
w3 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
v4 :: Word8
v4 = Word8
w4 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3F
v :: Int
v = ((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v1) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
18)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v2) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
12)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. ((Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v3) Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6)
Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
v4)
if Word8
v1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 Bool -> Bool -> Bool
&& Word8
v2 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0x10
then DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException ([Word8] -> DecodingException
IllegalRepresentation [Word8
w1,Word8
w2,Word8
w3,Word8
w4])
else (if Int
v Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x10FFFF
then Char -> m Char
forall (m :: * -> *) a. Monad m => a -> m a
return (Char -> m Char) -> Char -> m Char
forall a b. (a -> b) -> a -> b
$ Int -> Char
chr Int
v
else DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException ([Word8] -> DecodingException
IllegalRepresentation [Word8
w1,Word8
w2,Word8
w3,Word8
w4]))
| Bool
otherwise -> DecodingException -> m Char
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter Word8
w1)
where
invalidExtend :: a -> Bool
invalidExtend a
wrd = a
wrd a -> a -> a
forall a. Bits a => a -> a -> a
.&. a
0xC0 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
/= a
0x80
fetchExtend8 :: m Word8
fetchExtend8 = do
Word8
w <- m Word8
forall (m :: * -> *). ByteSource m => m Word8
fetchWord8
if Word8 -> Bool
forall a. (Bits a, Num a) => a -> Bool
invalidExtend Word8
w
then DecodingException -> m Word8
forall e (m :: * -> *) a. Throws e m => e -> m a
throwException (Word8 -> DecodingException
IllegalCharacter Word8
w)
else Word8 -> m Word8
forall (m :: * -> *) a. Monad m => a -> m a
return Word8
w