module Data.Encoding.GB18030
(GB18030(..))
where
import Control.Throws
import Data.Char (chr,ord)
import Data.Word
import Data.Bits
import Data.Encoding.Base
import Data.Encoding.ByteSource
import Data.Encoding.ByteSink
import Data.Encoding.Exception
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Typeable
#if __GLASGOW_HASKELL__>=608
import Data.ByteString.Unsafe (unsafeIndex)
#else
import Data.ByteString.Base (unsafeIndex)
#endif
import Data.Encoding.GB18030Data
data GB18030 = GB18030 deriving (Eq,Show,Typeable)
instance Encoding GB18030 where
decodeChar _ = do
w1 <- fetchWord8
case () of
_
| w1 <= 0x80 -> return (chr $ fromIntegral w1)
| w1 <= 0xFE -> do
w2 <- fetchWord8
case () of
_
| w2 < 0x30 -> throwException (IllegalCharacter w2)
| w2 <= 0x39 -> do
w3 <- fetchWord8
case () of
_
| w3 < 0x81 -> throwException (IllegalCharacter w3)
| w3 <= 0xFE -> do
w4 <- fetchWord8
case () of
_
| w4 < 0x30 -> throwException (IllegalCharacter w4)
| w4 <= 0x39 -> decodeGBFour $ linear w1 w2 w3 w4
| otherwise -> throwException (IllegalCharacter w4)
| otherwise -> throwException (IllegalCharacter w3)
| w2 <= 0x7E -> return $ decodeGBTwo $ linear2 w1 w2
| w2 == 0x7F -> throwException (IllegalCharacter w2)
| w2 <= 0xFE -> return $ decodeGBTwo $ linear2 w1 w2
| otherwise -> throwException (IllegalCharacter w2)
| otherwise -> throwException (IllegalCharacter w1)
encodeChar _ ch = if ch<='\x4946'
then (if ch<='\x4055'
then (if ch<='\x2E80'
then (if ch<='\x200F'
then (if ch<'\x0452'
then arr 0x0000 arr1
else range range1)
else (if ch<'\x2643'
then arr 0x2010 arr2
else range range2))
else (if ch<='\x3917'
then (if ch<'\x361B'
then arr 0x2E81 arr3
else range range3)
else (if ch<'\x3CE1'
then arr 0x3918 arr4
else range range4)))
else (if ch<='\x464B'
then (if ch<='\x4336'
then (if ch<'\x4160'
then arr 0x4056 arr5
else range range5)
else (if ch<'\x44D7'
then arr 0x4337 arr6
else range range6))
else (if ch<'\x478E'
then arr 0x464C arr7
else range range7)))
else (if ch<='\xF92B'
then (if ch<='\xD7FF'
then (if ch<='\x4C76'
then (if ch<'\x49B8'
then arr 0x4947 arr8
else range range8)
else (if ch<'\x9FA6'
then arr 0x4C77 arr9
else range range9))
else (if ch<'\xE865'
then arr 0xD800 arr10
else range range10))
else (if ch<='\xFFFF'
then (if ch<='\xFE2F'
then (if ch<'\xFA2A'
then arr 0xF92C arr11
else range range11)
else (if ch<'\xFFE6'
then arr 0xFE30 arr12
else range range12))
else (if ch<='\x10FFFF'
then range range13
else throwException (HasNoRepresentation ch))))
where
range r = let (w1,w2,w3,w4) = delinear (ord ch + r)
in pushWord8 w1 >> pushWord8 w2 >> pushWord8 w3 >> pushWord8 w4
arr off a = let ind = (ord ch off)*5
w1 = unsafeIndex a (ind+1)
w2 = unsafeIndex a (ind+2)
w3 = unsafeIndex a (ind+3)
w4 = unsafeIndex a (ind+4)
in do
pushWord8 w1
case unsafeIndex a ind of
1 -> return ()
2 -> pushWord8 w2
3 -> pushWord8 w2 >> pushWord8 w3
4 -> pushWord8 w2 >> pushWord8 w3 >> pushWord8 w4
encodeable _ c = c <= '\x10FFFF'
linear :: Word8 -> Word8 -> Word8 -> Word8 -> Int
linear w1 w2 w3 w4
= (fromIntegral (w40x30))
+ (fromIntegral (w30x81))*10
+ (fromIntegral (w20x30))*1260
+ (fromIntegral (w10x81))*12600
linear2 :: Word8 -> Word8 -> Int
linear2 w1 w2 = (fromIntegral (w2 (if w2<=0x7E
then 0x40
else 0x41)))
+ (fromIntegral (w10x81))*190
delinear :: Int -> (Word8,Word8,Word8,Word8)
delinear n = let
(w1,n1) = n `divMod` 12600
(w2,n2) = n1 `divMod` 1260
(w3,n3) = n2 `divMod` 10
w4 = n3
in (fromIntegral w1+0x81
,fromIntegral w2+0x30
,fromIntegral w3+0x81
,fromIntegral w4+0x30)
decodeGBTwo :: Int -> Char
decodeGBTwo n = let
rn = n*2
w1 = unsafeIndex rrarr rn
w2 = unsafeIndex rrarr (rn+1)
in chr $ ((fromIntegral w1) `shiftL` 8) .|. (fromIntegral w2)
decodeGBFour :: ByteSource m => Int -> m Char
decodeGBFour v = if v<=17858
then (if v<=15582
then (if v<=11328
then (if v<=7921
then (if v<820
then arr 0 rarr1
else range range1)
else (if v<9219
then arr 7922 rarr2
else range range2))
else (if v<=13737
then (if v<12973
then arr 11329 rarr3
else range range3)
else (if v<14698
then arr 13738 rarr4
else range range4)))
else (if v<=17101
then (if v<=16317
then (if v<15847
then arr 15583 rarr5
else range range5)
else (if v<16729
then arr 16318 rarr6
else range range6))
else (if v<17418
then arr 17102 rarr7
else range range7)))
else (if v<=37844
then (if v<=33468
then (if v<=18663
then (if v<17961
then arr 17859 rarr8
else range range8)
else (if v<19043
then arr 18664 rarr9
else range range9))
else (if v<33550
then arr 33469 rarr10
else range range10))
else (if v<=39419
then (if v<=39107
then (if v<38078
then arr 37845 rarr11
else range range11)
else (if v<39394
then arr 39108 rarr12
else range range12))
else (if v<=1237575 && v>=189000
then range range13
else throwException OutOfRange)))
where
arr off a = let
v' = (voff)*2
w1 = unsafeIndex a v'
w2 = unsafeIndex a (v'+1)
in return $ chr $ ((fromIntegral w1) `shiftL` 8)
.|. (fromIntegral w2)
range r = return $ chr (vr)
range1,range2,range3,range4,range5,range6,range7,range8,range9,range10,range11,range12,range13 :: Int
range1 = 286
range2 = 576
range3 = 878
range4 = 887
range5 = 889
range6 = 894
range7 = 900
range8 = 911
range9 = 21827
range10 = 25943
range11 = 25964
range12 = 26116
range13 = 123464