{-# LANGUAGE Trustworthy #-}
module Web.Willow.Common.Encoding.GB
( decoder
, encoderGb18030
, encoderGbk
) where
import qualified Control.Applicative as A
import qualified Data.ByteString as BS
import qualified Data.ByteString.Short as BS.SH
import qualified Data.Char as C
import qualified Data.Maybe as Y
import qualified Data.Tuple as U
import qualified Data.Vector as V
import qualified Data.Word as W
import Control.Applicative ( (<|>) )
import Web.Willow.Common.Encoding.Common
import Web.Willow.Common.Parser
import Web.Willow.Common.Parser.Switch
decoder :: TextBuilder
decoder :: TextBuilder
decoder = StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> TextBuilder) -> TextBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)]
-> Word8 -> TextBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Word8 -> Bool
isAsciiByte Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
toUnicode1
, (Word8 -> Bool)
-> TextBuilder
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> m out -> SwitchCase test m out
If_ (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80) (TextBuilder
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> TextBuilder
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ [Word8] -> Char -> TextBuilder
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
0x80] Char
'\x20AC'
, (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0xFE) ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ \Word8
first -> Word8 -> TextBuilder
decoderFourBytes Word8
first TextBuilder -> TextBuilder -> TextBuilder
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Word8 -> TextBuilder
decoderTwoBytes Word8
first TextBuilder -> TextBuilder -> TextBuilder
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> do
ByteString
bs <- StateT (Confidence, ()) (Parser ByteString) ByteString
-> StateT (Confidence, ()) (Parser ByteString) ByteString
forall (m :: * -> *) stream token out.
MonadParser m stream token =>
m out -> m out
lookAhead (StateT (Confidence, ()) (Parser ByteString) ByteString
-> StateT (Confidence, ()) (Parser ByteString) ByteString)
-> StateT (Confidence, ()) (Parser ByteString) ByteString
-> StateT (Confidence, ()) (Parser ByteString) ByteString
forall a b. (a -> b) -> a -> b
$ Word -> StateT (Confidence, ()) (Parser ByteString) ByteString
forall (m :: * -> *) stream token.
MonadParser m stream token =>
Word -> m stream
nextChunk Word
3
if ByteString -> Int
BS.length ByteString
bs Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
3
then TextBuilder
forall (f :: * -> *) a. Alternative f => f a
A.empty
else Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
decoderFailure1 Word8
first
, (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
decoderFailure1
]
decoderTwoBytes :: W.Word8 -> TextBuilder
decoderTwoBytes :: Word8 -> TextBuilder
decoderTwoBytes Word8
first = StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> TextBuilder) -> TextBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)]
-> Word8 -> TextBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0x40 Word8
0x7E) ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> TextBuilder
forall state.
Word8
-> Word8
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
decoderTwoBytes' Word8
0x40
, (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0x80 Word8
0xFE) ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> TextBuilder
forall state.
Word8
-> Word8
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
decoderTwoBytes' Word8
0x41
, (Word8 -> Bool)
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xFF) ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ Word8 -> Word8 -> TextBuilder
forall state.
Word8
-> Word8
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
decoderFailure2 Word8
first
, (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String))
-> (Word8 -> TextBuilder)
-> SwitchCase
Word8
(StateT (Confidence, ()) (Parser ByteString))
(DecoderError String)
forall a b. (a -> b) -> a -> b
$ \Word8
b -> Word8 -> StateT (Confidence, ()) (Parser ByteString) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push Word8
b StateT (Confidence, ()) (Parser ByteString) ()
-> TextBuilder -> TextBuilder
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Word8 -> TextBuilder
forall state. Word8 -> StateTextBuilder state
decoderFailure1 Word8
first
]
where decoderTwoBytes' :: Word8
-> Word8
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
decoderTwoBytes' Word8
offset Word8
second = StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
-> (Char
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String))
-> Maybe Char
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (Word8
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall state. Word8 -> StateTextBuilder state
recovery Word8
second) ([Word8]
-> Char
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
first, Word8
second]) (Maybe Char
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String))
-> (Word -> Maybe Char)
-> Word
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
Word -> Maybe Char
decodeIndex (Word
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String))
-> Word
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall a b. (a -> b) -> a -> b
$ (Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
first Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x81) Word -> Word -> Word
forall a. Num a => a -> a -> a
* Word
190 Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
second Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
offset)
recovery :: Word8
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
recovery Word8
second
| Word8 -> Bool
isAsciiByte Word8
second = Word8 -> StateT (Confidence, state) (Parser ByteString) ()
forall (m :: * -> *) stream token.
MonadParser m stream token =>
token -> m ()
push Word8
second StateT (Confidence, state) (Parser ByteString) ()
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Word8
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall state. Word8 -> StateTextBuilder state
decoderFailure1 Word8
first
| Bool
otherwise = [Word8]
-> StateT
(Confidence, state) (Parser ByteString) (DecoderError String)
forall state. [Word8] -> StateTextBuilder state
decoderFailure [Word8
first, Word8
second]
decoderFourBytes :: W.Word8 -> TextBuilder
decoderFourBytes :: Word8 -> TextBuilder
decoderFourBytes Word8
first = do
Word8
second <- StateT (Confidence, ()) (Parser ByteString) Word8
-> StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token out.
MonadParser m stream token =>
m out -> m out
lookAhead StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> StateT (Confidence, ()) (Parser ByteString) Word8)
-> StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Word8 -> Bool)
-> Word8 -> StateT (Confidence, ()) (Parser ByteString) Word8
forall (trans :: * -> *) stream token out.
MonadParser trans stream token =>
(out -> Bool) -> out -> trans out
satisfying (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0x30 Word8
0x39)
Maybe (Word8, Word8)
trail <- StateT (Confidence, ()) (Parser ByteString) (Word8, Word8)
-> StateT
(Confidence, ()) (Parser ByteString) (Maybe (Word8, Word8))
forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
A.optional (StateT (Confidence, ()) (Parser ByteString) (Word8, Word8)
-> StateT
(Confidence, ()) (Parser ByteString) (Maybe (Word8, Word8)))
-> StateT (Confidence, ()) (Parser ByteString) (Word8, Word8)
-> StateT
(Confidence, ()) (Parser ByteString) (Maybe (Word8, Word8))
forall a b. (a -> b) -> a -> b
$ do
Word8
_ <- StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next
Word8
third <- StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> StateT (Confidence, ()) (Parser ByteString) Word8)
-> StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Word8 -> Bool)
-> Word8 -> StateT (Confidence, ()) (Parser ByteString) Word8
forall (trans :: * -> *) stream token out.
MonadParser trans stream token =>
(out -> Bool) -> out -> trans out
satisfying (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0x81 Word8
0xFE)
Word8
fourth <- StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT (Confidence, ()) (Parser ByteString) Word8
-> (Word8 -> StateT (Confidence, ()) (Parser ByteString) Word8)
-> StateT (Confidence, ()) (Parser ByteString) Word8
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Word8 -> Bool)
-> Word8 -> StateT (Confidence, ()) (Parser ByteString) Word8
forall (trans :: * -> *) stream token out.
MonadParser trans stream token =>
(out -> Bool) -> out -> trans out
satisfying (Word8 -> Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> a -> Bool
range Word8
0x30 Word8
0x39)
(Word8, Word8)
-> StateT (Confidence, ()) (Parser ByteString) (Word8, Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Word8
third, Word8
fourth)
Word8 -> Maybe (Word8, Word8) -> TextBuilder
forall state.
Word8 -> Maybe (Word8, Word8) -> StateTextBuilder state
decoderFourBytes' Word8
second Maybe (Word8, Word8)
trail
where decoderFourBytes' :: Word8 -> Maybe (Word8, Word8) -> StateTextBuilder state
decoderFourBytes' Word8
second (Just (Word8
third, Word8
fourth)) = StateTextBuilder state
-> (Char -> StateTextBuilder state)
-> Maybe Char
-> StateTextBuilder state
forall b a. b -> (a -> b) -> Maybe a -> b
maybe
([Word8] -> StateTextBuilder state
forall state. [Word8] -> StateTextBuilder state
decoderFailure [Word8
first, Word8
second, Word8
third, Word8
fourth])
([Word8] -> Char -> StateTextBuilder state
forall state. [Word8] -> Char -> StateTextBuilder state
emit [Word8
first, Word8
second, Word8
third, Word8
fourth]) (Maybe Char -> StateTextBuilder state)
-> (Word -> Maybe Char) -> Word -> StateTextBuilder state
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
Word -> Maybe Char
decodeRange (Word -> StateTextBuilder state) -> Word -> StateTextBuilder state
forall a b. (a -> b) -> a -> b
$
(Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
first Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x81) Word -> Word -> Word
forall a. Num a => a -> a -> a
* Word
12600
Word -> Word -> Word
forall a. Num a => a -> a -> a
+ (Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
second Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x30) Word -> Word -> Word
forall a. Num a => a -> a -> a
* Word
1260
Word -> Word -> Word
forall a. Num a => a -> a -> a
+ (Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
third Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x81) Word -> Word -> Word
forall a. Num a => a -> a -> a
* Word
10
Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
fourth Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
0x30
decoderFourBytes' Word8
_ Maybe (Word8, Word8)
Nothing = Word8 -> StateTextBuilder state
forall state. Word8 -> StateTextBuilder state
decoderFailure1 Word8
first
encoderGb18030 :: BinaryBuilder
encoderGb18030 :: BinaryBuilder
encoderGb18030 = Bool -> BinaryBuilder
encoder Bool
False
encoderGbk :: BinaryBuilder
encoderGbk :: BinaryBuilder
encoderGbk = Bool -> BinaryBuilder
encoder Bool
True
encoder :: Bool -> BinaryBuilder
encoder :: Bool -> BinaryBuilder
encoder Bool
isGbk = StateT () (Parser Text) Char
forall (m :: * -> *) stream token.
MonadParser m stream token =>
m token
next StateT () (Parser Text) Char
-> (Char -> BinaryBuilder) -> BinaryBuilder
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)]
-> Char -> BinaryBuilder
forall (m :: * -> *) test out.
Alternative m =>
[SwitchCase test m out] -> test -> m out
switch
[ (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If Char -> Bool
C.isAscii Char -> BinaryBuilder
forall state. Char -> StateBinaryBuilder state
fromAscii
, (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\xE5E5') Char -> BinaryBuilder
forall state out. Char -> StateEncoder state (EncoderError out)
encoderFailure
, (Char -> Bool)
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> m out -> SwitchCase test m out
If_ (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\x20AC' Bool -> Bool -> Bool
&& Bool
isGbk) (BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString))
-> BinaryBuilder
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall a b. (a -> b) -> a -> b
$ EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> EncoderError ShortByteString)
-> ShortByteString -> EncoderError ShortByteString
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack [Word8
0x80])
, (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Maybe Word -> Bool
forall a. Maybe a -> Bool
Y.isJust (Maybe Word -> Bool) -> (Char -> Maybe Word) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Maybe Word
encodeIndex) ((Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString))
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall a b. (a -> b) -> a -> b
$ \Char
c ->
let (Word
lead', Word
trail) = Word -> Word -> (Word, Word)
forall a. Integral a => a -> a -> (a, a)
divMod (Maybe Word -> Word
forall a. HasCallStack => Maybe a -> a
Y.fromJust (Maybe Word -> Word) -> Maybe Word -> Word
forall a b. (a -> b) -> a -> b
$ Char -> Maybe Word
encodeIndex Char
c) Word
190
lead :: Word8
lead = Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
lead' Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x81
in EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (EncoderError ShortByteString -> BinaryBuilder)
-> ([Word8] -> EncoderError ShortByteString)
-> [Word8]
-> BinaryBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> EncoderError ShortByteString)
-> ([Word8] -> ShortByteString)
-> [Word8]
-> EncoderError ShortByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word8] -> ShortByteString
BS.SH.pack ([Word8] -> BinaryBuilder) -> [Word8] -> BinaryBuilder
forall a b. (a -> b) -> a -> b
$ if Word
trail Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
0x3F
then [Word8
lead, Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
trail Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x40]
else [Word8
lead, Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
trail Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x41]
, (Char -> Bool)
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> Bool) -> (test -> m out) -> SwitchCase test m out
If (Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
isGbk) Char -> BinaryBuilder
forall state out. Char -> StateEncoder state (EncoderError out)
encoderFailure
, (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall test (m :: * -> *) out.
(test -> m out) -> SwitchCase test m out
Else ((Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString))
-> (Char -> BinaryBuilder)
-> SwitchCase
Char (StateT () (Parser Text)) (EncoderError ShortByteString)
forall a b. (a -> b) -> a -> b
$ \Char
c ->
let (Word
b1, Word
p1) = Word -> Word -> (Word, Word)
forall a. Integral a => a -> a -> (a, a)
divMod (Char -> Word
encodeRange Char
c) Word
12600
(Word
b2, Word
p2) = Word -> Word -> (Word, Word)
forall a. Integral a => a -> a -> (a, a)
divMod Word
p1 Word
1260
(Word
b3, Word
b4) = Word -> Word -> (Word, Word)
forall a. Integral a => a -> a -> (a, a)
divMod Word
p2 Word
10
in EncoderError ShortByteString -> BinaryBuilder
forall (m :: * -> *) a. Monad m => a -> m a
return (EncoderError ShortByteString -> BinaryBuilder)
-> (ShortByteString -> EncoderError ShortByteString)
-> ShortByteString
-> BinaryBuilder
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShortByteString -> EncoderError ShortByteString
forall (f :: * -> *) a. Applicative f => a -> f a
pure (ShortByteString -> BinaryBuilder)
-> ShortByteString -> BinaryBuilder
forall a b. (a -> b) -> a -> b
$ [Word8] -> ShortByteString
BS.SH.pack
[ Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
b1 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x81
, Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
b2 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x30
, Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
b3 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x81
, Word -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
b4 Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
0x30
]
]
decodeIndex :: Word -> Maybe Char
decodeIndex :: Word -> Maybe Char
decodeIndex Word
index = IO (MemoizationTable Word (Maybe Char))
-> Maybe (IO (MemoizationTable Char (Maybe Word)))
-> Word
-> (Word -> Maybe Char)
-> Maybe Char
forall k v.
(Eq k, Hashable k, Eq v, Hashable v) =>
IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> k
-> (k -> Maybe v)
-> Maybe v
lookupMemoizedIndex IO (MemoizationTable Word (Maybe Char))
decodeIndexM (IO (MemoizationTable Char (Maybe Word))
-> Maybe (IO (MemoizationTable Char (Maybe Word)))
forall a. a -> Maybe a
Just IO (MemoizationTable Char (Maybe Word))
encodeIndexM) Word
index Word -> Maybe Char
readDecodeIndex
decodeIndexM :: DecoderMemoTable
decodeIndexM :: IO (MemoizationTable Word (Maybe Char))
decodeIndexM = IO (MemoizationTable Word (Maybe Char))
forall k v. IO (MemoizationTable k v)
newMemoizationTable
{-# NOINLINE decodeIndexM #-}
readDecodeIndex :: Word -> Maybe Char
readDecodeIndex :: Word -> Maybe Char
readDecodeIndex Word
index = Word -> [(Word, Char)] -> Maybe Char
forall k v. Ord k => k -> [(k, v)] -> Maybe v
search Word
index ([(Word, Char)] -> Maybe Char) -> [(Word, Char)] -> Maybe Char
forall a b. (a -> b) -> a -> b
$ String -> [(Word, Char)]
loadIndex String
"gb18030"
encodeIndex :: Char -> Maybe Word
encodeIndex :: Char -> Maybe Word
encodeIndex Char
char = IO (MemoizationTable Char (Maybe Word))
-> Maybe (IO (MemoizationTable Word (Maybe Char)))
-> Char
-> (Char -> Maybe Word)
-> Maybe Word
forall k v.
(Eq k, Hashable k, Eq v, Hashable v) =>
IO (MemoizationTable k (Maybe v))
-> Maybe (IO (MemoizationTable v (Maybe k)))
-> k
-> (k -> Maybe v)
-> Maybe v
lookupMemoizedIndex IO (MemoizationTable Char (Maybe Word))
encodeIndexM (IO (MemoizationTable Word (Maybe Char))
-> Maybe (IO (MemoizationTable Word (Maybe Char)))
forall a. a -> Maybe a
Just IO (MemoizationTable Word (Maybe Char))
decodeIndexM) Char
char Char -> Maybe Word
readEncodeIndex
encodeIndexM :: EncoderMemoTable
encodeIndexM :: IO (MemoizationTable Char (Maybe Word))
encodeIndexM = IO (MemoizationTable Char (Maybe Word))
forall k v. IO (MemoizationTable k v)
newMemoizationTable
{-# NOINLINE encodeIndexM #-}
readEncodeIndex :: Char -> Maybe Word
readEncodeIndex :: Char -> Maybe Word
readEncodeIndex Char
char = Maybe Word
i1 Maybe Word -> Maybe Word -> Maybe Word
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> [(Char, Word)] -> Maybe Word
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Char
char [(Char, Word)]
is2
where ([(Char, Word)]
is1, [(Char, Word)]
is2) = Int -> [(Char, Word)] -> ([(Char, Word)], [(Char, Word)])
forall a. Int -> [a] -> ([a], [a])
splitAt Int
6176 ([(Char, Word)] -> ([(Char, Word)], [(Char, Word)]))
-> ([(Word, Char)] -> [(Char, Word)])
-> [(Word, Char)]
-> ([(Char, Word)], [(Char, Word)])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Word, Char) -> (Char, Word)) -> [(Word, Char)] -> [(Char, Word)]
forall a b. (a -> b) -> [a] -> [b]
map (Word, Char) -> (Char, Word)
forall a b. (a, b) -> (b, a)
U.swap ([(Word, Char)] -> ([(Char, Word)], [(Char, Word)]))
-> [(Word, Char)] -> ([(Char, Word)], [(Char, Word)])
forall a b. (a -> b) -> a -> b
$ String -> [(Word, Char)]
loadIndex String
"gb18030"
i1 :: Maybe Word
i1 = if Char
char Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'\x4E02' Bool -> Bool -> Bool
&& Char
char Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\xE525'
then Char -> [(Char, Word)] -> Maybe Word
forall k v. Ord k => k -> [(k, v)] -> Maybe v
search Char
char [(Char, Word)]
is1
else Maybe Word
forall a. Maybe a
Nothing
decodeRange :: Word -> Maybe Char
decodeRange :: Word -> Maybe Char
decodeRange Word
index
| Word
index Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
> Word
39419 Bool -> Bool -> Bool
&& Word
index Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
189000 = Maybe Char
forall a. Maybe a
Nothing
| Word
index Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
> Word
1237575 = Maybe Char
forall a. Maybe a
Nothing
| Word
index Word -> Word -> Bool
forall a. Eq a => a -> a -> Bool
== Word
7457 = Char -> Maybe Char
forall a. a -> Maybe a
Just Char
'\xE7C7'
| Bool
otherwise = Char -> Maybe Char
forall a. a -> Maybe a
Just (Char -> Maybe Char) -> (Word -> Char) -> Word -> Maybe Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> (Word -> Int) -> Word -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word -> Maybe Char) -> Word -> Maybe Char
forall a b. (a -> b) -> a -> b
$ Word
co Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Word
index Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
po
where (Word
po, Word
co) = ((Word, Char) -> Bool) -> (Word, Word)
findRange (((Word, Char) -> Bool) -> (Word, Word))
-> ((Word, Char) -> Bool) -> (Word, Word)
forall a b. (a -> b) -> a -> b
$ \(Word
i, Char
_) -> Word
i Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
<= Word
index
encodeRange :: Char -> Word
encodeRange :: Char -> Word
encodeRange Char
'\xE7C7' = Word
7457
encodeRange Char
char = Word
po Word -> Word -> Word
forall a. Num a => a -> a -> a
+ Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
char) Word -> Word -> Word
forall a. Num a => a -> a -> a
- Word
co
where (Word
po, Word
co) = ((Word, Char) -> Bool) -> (Word, Word)
findRange (((Word, Char) -> Bool) -> (Word, Word))
-> ((Word, Char) -> Bool) -> (Word, Word)
forall a b. (a -> b) -> a -> b
$ \(Word
_, Char
c) -> Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
char
findRange :: ((Word, Char) -> Bool) -> (Word, Word)
findRange :: ((Word, Char) -> Bool) -> (Word, Word)
findRange (Word, Char) -> Bool
f = (Char -> Word) -> (Word, Char) -> (Word, Word)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Int -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word) -> (Char -> Int) -> Char -> Word
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
forall a. Enum a => a -> Int
fromEnum) ((Word, Char) -> (Word, Word))
-> (Maybe (Word, Char) -> (Word, Char))
-> Maybe (Word, Char)
-> (Word, Word)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word, Char) -> Maybe (Word, Char) -> (Word, Char)
forall a. a -> Maybe a -> a
Y.fromMaybe (Word
0, Char
'\NUL') (Maybe (Word, Char) -> (Word, Word))
-> Maybe (Word, Char) -> (Word, Word)
forall a b. (a -> b) -> a -> b
$ ((Word, Char) -> Bool) -> Vector (Word, Char) -> Maybe (Word, Char)
forall a. (a -> Bool) -> Vector a -> Maybe a
V.find (Word, Char) -> Bool
f Vector (Word, Char)
gbRanges
gbRanges :: V.Vector (Word, Char)
gbRanges :: Vector (Word, Char)
gbRanges = [(Word, Char)] -> Vector (Word, Char)
forall a. [a] -> Vector a
V.fromList ([(Word, Char)] -> Vector (Word, Char))
-> ([(Word, Char)] -> [(Word, Char)])
-> [(Word, Char)]
-> Vector (Word, Char)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Word, Char)] -> [(Word, Char)]
forall a. [a] -> [a]
reverse ([(Word, Char)] -> Vector (Word, Char))
-> [(Word, Char)] -> Vector (Word, Char)
forall a b. (a -> b) -> a -> b
$ String -> [(Word, Char)]
loadIndex String
"gb18030-ranges"