{-# LANGUAGE BangPatterns
, DataKinds
, KindSignatures
, UnboxedTuples #-}
module Parser.Lathe.Encoding.UTF16
(
utf16BOM
, UTF16Unit (..)
, UTF16Point (..)
, fromUtf16
, UTF16Branch (..)
, unitUtf16BE
, unitUtf16LE
, contUtf16BE_2
, contUtf16LE_2
, skipUtf16BE_2
, skipUtf16LE_2
) where
import Parser.Lathe.Binary.Internal
import Parser.Lathe.Internal
import Data.Bits
import Data.ByteString (ByteString)
import qualified Data.ByteString.Unsafe as B
import Data.Word
import GHC.Base (unsafeChr)
import GHC.ByteOrder
import GHC.TypeNats (Nat)
{-# INLINE utf16BOM #-}
utf16BOM
:: e
-> e
-> Parser e ByteOrder
utf16BOM :: forall e. e -> e -> Parser e ByteOrder
utf16BOM e
malformed = Int
-> (ByteString -> (# Res e ByteOrder #)) -> e -> Parser e ByteOrder
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 (e -> ByteString -> (# Res e ByteOrder #)
forall e. e -> ByteString -> (# Res e ByteOrder #)
convUTF16BOM e
malformed)
convUTF16BOM :: e -> ByteString -> (# Res e ByteOrder #)
convUTF16BOM :: forall e. e -> ByteString -> (# Res e ByteOrder #)
convUTF16BOM e
e = \ByteString
b ->
let w0 :: Word8
w0 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
0
w1 :: Word8
w1 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
1
in Word8 -> Word8 -> (# Res e ByteOrder #)
forall {a} {a}.
(Eq a, Eq a, Num a, Num a) =>
a -> a -> (# Res e ByteOrder #)
go Word8
w0 Word8
w1
where
go :: a -> a -> (# Res e ByteOrder #)
go a
w0 a
w1
| a
w0 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0xFE, a
w1 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0xFF = (# ByteOrder -> Res e ByteOrder
forall a e. a -> Res e a
Yes ByteOrder
BigEndian #)
| a
w0 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0xFF, a
w1 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0xFE = (# ByteOrder -> Res e ByteOrder
forall a e. a -> Res e a
Yes ByteOrder
LittleEndian #)
| Bool
otherwise = (# e -> Res e ByteOrder
forall e a. e -> Res e a
No e
e #)
data UTF16Error =
UTF16BadHigh {-# UNPACK #-} !Word16
| UTF16BadLow {-# UNPACK #-} !Word16 {-# UNPACK #-} !Word16
deriving Int -> UTF16Error -> ShowS
[UTF16Error] -> ShowS
UTF16Error -> String
(Int -> UTF16Error -> ShowS)
-> (UTF16Error -> String)
-> ([UTF16Error] -> ShowS)
-> Show UTF16Error
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> UTF16Error -> ShowS
showsPrec :: Int -> UTF16Error -> ShowS
$cshow :: UTF16Error -> String
show :: UTF16Error -> String
$cshowList :: [UTF16Error] -> ShowS
showList :: [UTF16Error] -> ShowS
Show
newtype UTF16Unit (byteOrder :: ByteOrder) (n :: Nat) = UTF16Unit Word16
newtype UTF16Point (n :: Nat) = UTF16Point Word32
fromUtf16 :: UTF16Point n -> Char
fromUtf16 :: forall (n :: Nat). UTF16Point n -> Char
fromUtf16 (UTF16Point Word32
u0) = Int -> Char
unsafeChr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
u0
{-# INLINE castUTF16 #-}
castUTF16 :: Word16 -> Word16 -> (# Res e Char #)
castUTF16 :: forall e. Word16 -> Word16 -> (# Res e Char #)
castUTF16 Word16
u0 Word16
u1 =
let !c :: Char
c = Int -> Char
unsafeChr (Int -> Char) -> (Word32 -> Int) -> Word32 -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral :: Word32 -> Int) (Word32 -> Char) -> Word32 -> Char
forall a b. (a -> b) -> a -> b
$
Word32
0x000010000
Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ (Word16 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
u0 Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
- Word16
0xD800) Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
`unsafeShiftL` Int
10)
Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word16 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word16
u1 Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
- Word16
0xDC00)
in (# Char -> Res e Char
forall a e. a -> Res e a
Yes Char
c #)
data UTF16Branch (byteOrder :: ByteOrder) = UTF16_1 {-# UNPACK #-} !(UTF16Point 1)
| UTF16_2 {-# UNPACK #-} !(UTF16Unit byteOrder 2)
{-# INLINE unitUtf16BE #-}
unitUtf16BE
:: e
-> e
-> Parser e (UTF16Branch 'BigEndian)
unitUtf16BE :: forall e. e -> e -> Parser e (UTF16Branch 'BigEndian)
unitUtf16BE e
e = Int
-> (ByteString -> (# Res e (UTF16Branch 'BigEndian) #))
-> e
-> Parser e (UTF16Branch 'BigEndian)
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 (e -> ByteString -> (# Res e (UTF16Branch 'BigEndian) #)
forall e. e -> ByteString -> (# Res e (UTF16Branch 'BigEndian) #)
u16BE e
e)
u16BE :: e -> ByteString -> (# Res e (UTF16Branch 'BigEndian) #)
u16BE :: forall e. e -> ByteString -> (# Res e (UTF16Branch 'BigEndian) #)
u16BE e
e = \ByteString
b ->
let u0 :: Word16
u0 = ByteString -> Word16
w16BE ByteString
b
in if (Word16
u0 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0xF800) Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0xD800
then if Word16
u0 Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xDC00
then (# e -> Res e (UTF16Branch 'BigEndian)
forall e a. e -> Res e a
No e
e #)
else let !r :: UTF16Branch byteOrder
r = UTF16Unit byteOrder 2 -> UTF16Branch byteOrder
forall (byteOrder :: ByteOrder).
UTF16Unit byteOrder 2 -> UTF16Branch byteOrder
UTF16_2 (Word16 -> UTF16Unit byteOrder 2
forall (byteOrder :: ByteOrder) (n :: Nat).
Word16 -> UTF16Unit byteOrder n
UTF16Unit Word16
u0)
in (# UTF16Branch 'BigEndian -> Res e (UTF16Branch 'BigEndian)
forall a e. a -> Res e a
Yes UTF16Branch 'BigEndian
forall {byteOrder :: ByteOrder}. UTF16Branch byteOrder
r #)
else let !r :: UTF16Branch byteOrder
r = UTF16Point 1 -> UTF16Branch byteOrder
forall (byteOrder :: ByteOrder).
UTF16Point 1 -> UTF16Branch byteOrder
UTF16_1 (Word32 -> UTF16Point 1
forall (n :: Nat). Word32 -> UTF16Point n
UTF16Point (Word16 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
u0))
in (# UTF16Branch 'BigEndian -> Res e (UTF16Branch 'BigEndian)
forall a e. a -> Res e a
Yes UTF16Branch 'BigEndian
forall {byteOrder :: ByteOrder}. UTF16Branch byteOrder
r #)
{-# INLINE contUtf16BE_2 #-}
contUtf16BE_2
:: e
-> e
-> UTF16Unit 'BigEndian 2
-> Parser e Char
contUtf16BE_2 :: forall e. e -> e -> UTF16Unit 'BigEndian 2 -> Parser e Char
contUtf16BE_2 e
u16err e
end (UTF16Unit Word16
u0) = Int -> (ByteString -> (# Res e Char #)) -> e -> Parser e Char
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 (e -> Word16 -> ByteString -> (# Res e Char #)
forall e. e -> Word16 -> ByteString -> (# Res e Char #)
convUTF16BE_2 e
u16err Word16
u0) e
end
{-# INLINE skipUtf16BE_2 #-}
skipUtf16BE_2
:: e
-> e
-> UTF16Unit 'BigEndian 2
-> Parser e ()
skipUtf16BE_2 :: forall e. e -> e -> UTF16Unit 'BigEndian 2 -> Parser e ()
skipUtf16BE_2 e
u16err e
end (UTF16Unit Word16
u0) = Int -> (ByteString -> (# Res e () #)) -> e -> Parser e ()
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 (e -> Word16 -> ByteString -> (# Res e () #)
forall e. e -> Word16 -> ByteString -> (# Res e () #)
convUTF16BE_2_ e
u16err Word16
u0) e
end
convUTF16BE_2 :: e -> Word16 -> ByteString -> (# Res e Char #)
convUTF16BE_2 :: forall e. e -> Word16 -> ByteString -> (# Res e Char #)
convUTF16BE_2 e
e Word16
u = (Word16 -> Word16 -> (# Res e Char #))
-> e -> Word16 -> ByteString -> (# Res e Char #)
forall e a.
(Word16 -> Word16 -> (# Res e a #))
-> e -> Word16 -> ByteString -> (# Res e a #)
convUTF16BE_2__ Word16 -> Word16 -> (# Res e Char #)
forall e. Word16 -> Word16 -> (# Res e Char #)
castUTF16 e
e Word16
u
convUTF16BE_2_ :: e -> Word16 -> ByteString -> (# Res e () #)
convUTF16BE_2_ :: forall e. e -> Word16 -> ByteString -> (# Res e () #)
convUTF16BE_2_ e
e Word16
u = (Word16 -> Word16 -> (# Res e () #))
-> e -> Word16 -> ByteString -> (# Res e () #)
forall e a.
(Word16 -> Word16 -> (# Res e a #))
-> e -> Word16 -> ByteString -> (# Res e a #)
convUTF16BE_2__ (\Word16
_ Word16
_ -> (# () -> Res e ()
forall a e. a -> Res e a
Yes () #)) e
e Word16
u
{-# INLINE convUTF16BE_2__ #-}
convUTF16BE_2__
:: (Word16 -> Word16 -> (# Res e a #))
-> e -> Word16 -> ByteString -> (# Res e a #)
convUTF16BE_2__ :: forall e a.
(Word16 -> Word16 -> (# Res e a #))
-> e -> Word16 -> ByteString -> (# Res e a #)
convUTF16BE_2__ Word16 -> Word16 -> (# Res e a #)
f e
e Word16
u0 = \ByteString
b ->
let u1 :: Word16
u1 = ByteString -> Word16
w16BE ByteString
b
in if (Word16
u1 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0xFC00) Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0xDC00
then Word16 -> Word16 -> (# Res e a #)
f Word16
u0 Word16
u1
else (# e -> Res e a
forall e a. e -> Res e a
No e
e #)
{-# INLINE unitUtf16LE #-}
unitUtf16LE
:: e
-> e
-> Parser e (UTF16Branch 'LittleEndian)
unitUtf16LE :: forall e. e -> e -> Parser e (UTF16Branch 'LittleEndian)
unitUtf16LE e
e = Int
-> (ByteString -> (# Res e (UTF16Branch 'LittleEndian) #))
-> e
-> Parser e (UTF16Branch 'LittleEndian)
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 (e -> ByteString -> (# Res e (UTF16Branch 'LittleEndian) #)
forall e.
e -> ByteString -> (# Res e (UTF16Branch 'LittleEndian) #)
u16LE e
e)
u16LE :: e -> ByteString -> (# Res e (UTF16Branch 'LittleEndian) #)
u16LE :: forall e.
e -> ByteString -> (# Res e (UTF16Branch 'LittleEndian) #)
u16LE e
e = \ByteString
b ->
let u0 :: Word16
u0 = ByteString -> Word16
w16LE ByteString
b
in if (Word16
u0 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0xF800) Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0xD800
then if Word16
u0 Word16 -> Word16 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word16
0xDC00
then (# e -> Res e (UTF16Branch 'LittleEndian)
forall e a. e -> Res e a
No e
e #)
else let !r :: UTF16Branch byteOrder
r = UTF16Unit byteOrder 2 -> UTF16Branch byteOrder
forall (byteOrder :: ByteOrder).
UTF16Unit byteOrder 2 -> UTF16Branch byteOrder
UTF16_2 (Word16 -> UTF16Unit byteOrder 2
forall (byteOrder :: ByteOrder) (n :: Nat).
Word16 -> UTF16Unit byteOrder n
UTF16Unit Word16
u0)
in (# UTF16Branch 'LittleEndian -> Res e (UTF16Branch 'LittleEndian)
forall a e. a -> Res e a
Yes UTF16Branch 'LittleEndian
forall {byteOrder :: ByteOrder}. UTF16Branch byteOrder
r #)
else let !r :: UTF16Branch byteOrder
r = UTF16Point 1 -> UTF16Branch byteOrder
forall (byteOrder :: ByteOrder).
UTF16Point 1 -> UTF16Branch byteOrder
UTF16_1 (Word32 -> UTF16Point 1
forall (n :: Nat). Word32 -> UTF16Point n
UTF16Point (Word16 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
u0))
in (# UTF16Branch 'LittleEndian -> Res e (UTF16Branch 'LittleEndian)
forall a e. a -> Res e a
Yes UTF16Branch 'LittleEndian
forall {byteOrder :: ByteOrder}. UTF16Branch byteOrder
r #)
{-# INLINE contUtf16LE_2 #-}
contUtf16LE_2
:: e
-> e
-> UTF16Unit 'LittleEndian 2
-> Parser e Char
contUtf16LE_2 :: forall e. e -> e -> UTF16Unit 'LittleEndian 2 -> Parser e Char
contUtf16LE_2 e
u16err e
end (UTF16Unit Word16
u0) = Int -> (ByteString -> (# Res e Char #)) -> e -> Parser e Char
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 (e -> Word16 -> ByteString -> (# Res e Char #)
forall e. e -> Word16 -> ByteString -> (# Res e Char #)
convUTF16LE_2 e
u16err Word16
u0) e
end
{-# INLINE skipUtf16LE_2 #-}
skipUtf16LE_2
:: e
-> e
-> UTF16Unit 'LittleEndian 2
-> Parser e ()
skipUtf16LE_2 :: forall e. e -> e -> UTF16Unit 'LittleEndian 2 -> Parser e ()
skipUtf16LE_2 e
u16err e
end (UTF16Unit Word16
u0) = Int -> (ByteString -> (# Res e () #)) -> e -> Parser e ()
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 (e -> Word16 -> ByteString -> (# Res e () #)
forall e. e -> Word16 -> ByteString -> (# Res e () #)
convUTF16LE_2_ e
u16err Word16
u0) e
end
convUTF16LE_2 :: e -> Word16 -> ByteString -> (# Res e Char #)
convUTF16LE_2 :: forall e. e -> Word16 -> ByteString -> (# Res e Char #)
convUTF16LE_2 e
e Word16
u = (Word16 -> Word16 -> (# Res e Char #))
-> e -> Word16 -> ByteString -> (# Res e Char #)
forall e a.
(Word16 -> Word16 -> (# Res e a #))
-> e -> Word16 -> ByteString -> (# Res e a #)
convUTF16LE_2__ Word16 -> Word16 -> (# Res e Char #)
forall e. Word16 -> Word16 -> (# Res e Char #)
castUTF16 e
e Word16
u
convUTF16LE_2_ :: e -> Word16 -> ByteString -> (# Res e () #)
convUTF16LE_2_ :: forall e. e -> Word16 -> ByteString -> (# Res e () #)
convUTF16LE_2_ e
e Word16
u = (Word16 -> Word16 -> (# Res e () #))
-> e -> Word16 -> ByteString -> (# Res e () #)
forall e a.
(Word16 -> Word16 -> (# Res e a #))
-> e -> Word16 -> ByteString -> (# Res e a #)
convUTF16LE_2__ (\Word16
_ Word16
_ -> (# () -> Res e ()
forall a e. a -> Res e a
Yes () #)) e
e Word16
u
{-# INLINE convUTF16LE_2__ #-}
convUTF16LE_2__
:: (Word16 -> Word16 -> (# Res e a #))
-> e -> Word16 -> ByteString -> (# Res e a #)
convUTF16LE_2__ :: forall e a.
(Word16 -> Word16 -> (# Res e a #))
-> e -> Word16 -> ByteString -> (# Res e a #)
convUTF16LE_2__ Word16 -> Word16 -> (# Res e a #)
f e
e Word16
u0 = \ByteString
b ->
let u1 :: Word16
u1 = ByteString -> Word16
w16LE ByteString
b
in if (Word16
u1 Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
0xFC00) Word16 -> Word16 -> Bool
forall a. Eq a => a -> a -> Bool
== Word16
0xDC00
then Word16 -> Word16 -> (# Res e a #)
f Word16
u0 Word16
u1
else (# e -> Res e a
forall e a. e -> Res e a
No e
e #)