{-# LANGUAGE BangPatterns
, DataKinds
, KindSignatures
, UnboxedTuples #-}
module Parser.Lathe.Encoding.UTF32
(
utf32BOM
, UTF32Point (..)
, fromUtf32
, isSurrogate
, unitUtf32BE
, unitUtf32LE
) where
import Parser.Lathe.Binary.Internal
import Parser.Lathe.Internal
import Data.Bits
import Data.ByteString (ByteString)
import qualified Data.ByteString.Unsafe as B
import Data.Word
import GHC.Base (unsafeChr)
import GHC.ByteOrder
{-# INLINE utf32BOM #-}
utf32BOM
:: e
-> e
-> Parser e ByteOrder
utf32BOM :: forall e. e -> e -> Parser e ByteOrder
utf32BOM e
malformed = Int
-> (ByteString -> (# Res e ByteOrder #)) -> e -> Parser e ByteOrder
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
4 (e -> ByteString -> (# Res e ByteOrder #)
forall e. e -> ByteString -> (# Res e ByteOrder #)
convUtf32BOM e
malformed)
convUtf32BOM :: e -> ByteString -> (# Res e ByteOrder #)
convUtf32BOM :: forall e. e -> ByteString -> (# Res e ByteOrder #)
convUtf32BOM e
e = \ByteString
b ->
let w0 :: Word8
w0 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
0
w1 :: Word8
w1 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
1
w2 :: Word8
w2 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
2
w3 :: Word8
w3 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
3
in Word8 -> Word8 -> Word8 -> Word8 -> (# Res e ByteOrder #)
forall {a} {a} {a} {a}.
(Eq a, Eq a, Eq a, Eq a, Num a, Num a, Num a, Num a) =>
a -> a -> a -> a -> (# Res e ByteOrder #)
go Word8
w0 Word8
w1 Word8
w2 Word8
w3
where
go :: a -> a -> a -> a -> (# Res e ByteOrder #)
go a
w0 a
w1 a
w2 a
w3
| a
w0 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0x00, a
w1 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0x00, a
w2 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0xFE, a
w3 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0xFF = (# ByteOrder -> Res e ByteOrder
forall a e. a -> Res e a
Yes ByteOrder
BigEndian #)
| a
w0 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0xFF, a
w1 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0xFE, a
w2 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0x00, a
w3 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0x00 = (# ByteOrder -> Res e ByteOrder
forall a e. a -> Res e a
Yes ByteOrder
LittleEndian #)
| Bool
otherwise = (# e -> Res e ByteOrder
forall e a. e -> Res e a
No e
e #)
newtype UTF32Point = UTF32Point Word32
fromUtf32 :: UTF32Point -> Char
fromUtf32 :: UTF32Point -> Char
fromUtf32 (UTF32Point Word32
u) = Int -> Char
unsafeChr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
u
isSurrogate :: UTF32Point -> Bool
isSurrogate :: UTF32Point -> Bool
isSurrogate (UTF32Point Word32
u) = (Word32
u Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0xFFFFF800) Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
0x0000D800
{-# INLINE unitUtf32BE #-}
unitUtf32BE
:: e
-> e
-> Parser e UTF32Point
unitUtf32BE :: forall e. e -> e -> Parser e UTF32Point
unitUtf32BE e
e = Int
-> (ByteString -> (# Res e UTF32Point #))
-> e
-> Parser e UTF32Point
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
4 (e -> ByteString -> (# Res e UTF32Point #)
forall e. e -> ByteString -> (# Res e UTF32Point #)
u32BE e
e)
u32BE :: e -> ByteString -> (# Res e UTF32Point #)
u32BE :: forall e. e -> ByteString -> (# Res e UTF32Point #)
u32BE e
e = \ByteString
b ->
let w :: Word32
w = ByteString -> Word32
w32BE ByteString
b
in if Word32
w Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word32
0x00110000
then (# e -> Res e UTF32Point
forall e a. e -> Res e a
No e
e #)
else (# UTF32Point -> Res e UTF32Point
forall a e. a -> Res e a
Yes (Word32 -> UTF32Point
UTF32Point Word32
w) #)
{-# INLINE unitUtf32LE #-}
unitUtf32LE
:: e
-> e
-> Parser e UTF32Point
unitUtf32LE :: forall e. e -> e -> Parser e UTF32Point
unitUtf32LE e
e = Int
-> (ByteString -> (# Res e UTF32Point #))
-> e
-> Parser e UTF32Point
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
4 (e -> ByteString -> (# Res e UTF32Point #)
forall e. e -> ByteString -> (# Res e UTF32Point #)
u32LE e
e)
u32LE :: e -> ByteString -> (# Res e UTF32Point #)
u32LE :: forall e. e -> ByteString -> (# Res e UTF32Point #)
u32LE e
e = \ByteString
b ->
let w :: Word32
w = ByteString -> Word32
w32LE ByteString
b
in if Word32
w Word32 -> Word32 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word32
0x00110000
then (# e -> Res e UTF32Point
forall e a. e -> Res e a
No e
e #)
else (# UTF32Point -> Res e UTF32Point
forall a e. a -> Res e a
Yes (Word32 -> UTF32Point
UTF32Point Word32
w) #)