{-# LANGUAGE BangPatterns
           , DataKinds
           , KindSignatures
           , UnboxedTuples #-}

{- | Functions for parsing UTF-8.

     Parsing of UTF-8 code points is broken down into two steps
     to allow for full or partial validation of characters as opposed to full
     'Char' conversions.

     The following is an example of parsing a UTF-8 code point into a character:

 @
 data Error = EoF | Malformed | Surrogate | …

 charUtf8 :: t'Parser' Error Char
 charUtf8 = do
   u \<- 'unitUtf8' (\\_ -> Malformed) EoF
   case u of
     'UTF8_1' u1 -> pure $! 'fromUtf8' u1
     'UTF8_2' u2 -> 'contUtf8_2' Malformed EoF u2
     'UTF8_3' u3 -> do
       p \<- 'contUtf8_3' (\\_ -> Malformed) EoF u3
       if 'isSurrogate' p
         then err Surrogate
         else pure $! fromUtf8 p

     'UTF8_4' u4 -> 'contUtf8_4' (\\_ -> Malformed) EoF u4
 @
 -}

module Parser.Lathe.Encoding.UTF8
  ( -- * Byte-order mark
    utf8BOM

    -- * UTF-8
  , UTF8Unit (..)
  , UTF8Point (..)
  , fromUtf8
  , isSurrogate

    -- ** First code unit
  , UTF8Branch (..)
  , UTF8Error_1 (..)
  , unitUtf8

    -- ** Second code unit
  , contUtf8_2
  , skipUtf8_2

    -- ** Third code unit
  , UTF8Error_3 (..)
  , contUtf8_3
  , skipUtf8_3

    -- ** Fourth code unit
  , UTF8Error_4 (..)
  , contUtf8_4
  , skipUtf8_4
  ) where

import           Parser.Lathe.Internal

import           Data.Bits
import           Data.ByteString (ByteString)
import qualified Data.ByteString.Unsafe as B
import           Data.Word
import           GHC.Base (unsafeChr)
import           GHC.TypeNats (Nat)



{-# INLINE utf8BOM #-}
-- | Consume 3 bytes that represent a UTF-8 byte-order mark.
utf8BOM
  :: e           -- ^ Malformed.
  -> e           -- ^ Reached end.
  -> Parser e ()
utf8BOM :: forall e. e -> e -> Parser e ()
utf8BOM e
malformed = Int -> (ByteString -> (# Res e () #)) -> e -> Parser e ()
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
3 (e -> ByteString -> (# Res e () #)
forall e. e -> ByteString -> (# Res e () #)
convUTF8BOM e
malformed)

convUTF8BOM :: e -> ByteString -> (# Res e () #)
convUTF8BOM :: forall e. e -> ByteString -> (# Res e () #)
convUTF8BOM e
e = \ByteString
b ->
  let w0 :: Word8
w0 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
0
      w1 :: Word8
w1 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
1
      w2 :: Word8
w2 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
2

  in if Word8
w0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xEF Bool -> Bool -> Bool
&& Word8
w1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xBB Bool -> Bool -> Bool
&& Word8
w2 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xBF
       then (# () -> Res e ()
forall a e. a -> Res e a
Yes () #)
       else (# e -> Res e ()
forall e a. e -> Res e a
No e
e #)



-- | Errors that may be encountered when parsing the first UTF-8 code unit.
data UTF8Error_1 = -- | (byte 1 is @10xxxxxx@) Continuation code unit.
                   UTF8Continuation

                   -- | (byte 1 is @110__0000__x@) Overlong 2-unit code point.
                 | UTF8Overlong_2

                   -- | (byte 1 is @11110__1__xx@, @xx@ is not @00@)
                   --   Invalid 4-unit code point.
                 | UTF8Overflow_1

                   -- | (byte 1 is @11111xxx@)
                   --   Invalid first code unit.
                 | UTF8Invalid
                   deriving (Int -> UTF8Error_1 -> ShowS
[UTF8Error_1] -> ShowS
UTF8Error_1 -> String
(Int -> UTF8Error_1 -> ShowS)
-> (UTF8Error_1 -> String)
-> ([UTF8Error_1] -> ShowS)
-> Show UTF8Error_1
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> UTF8Error_1 -> ShowS
showsPrec :: Int -> UTF8Error_1 -> ShowS
$cshow :: UTF8Error_1 -> String
show :: UTF8Error_1 -> String
$cshowList :: [UTF8Error_1] -> ShowS
showList :: [UTF8Error_1] -> ShowS
Show, UTF8Error_1 -> UTF8Error_1 -> Bool
(UTF8Error_1 -> UTF8Error_1 -> Bool)
-> (UTF8Error_1 -> UTF8Error_1 -> Bool) -> Eq UTF8Error_1
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: UTF8Error_1 -> UTF8Error_1 -> Bool
== :: UTF8Error_1 -> UTF8Error_1 -> Bool
$c/= :: UTF8Error_1 -> UTF8Error_1 -> Bool
/= :: UTF8Error_1 -> UTF8Error_1 -> Bool
Eq)
 

-- | Errors that may be encountered when parsing the third UTF-8 code unit.
data UTF8Error_3 = -- | (byte 2 is not @10xxxxxx@) Non-continuation second code unit.
                   UTF8Incomplete_2_3

                   -- | (byte 1 is @1110__0000__@, byte 2 is @10__0__xxxxx@)
                   --   Overlong 3-unit code point.
                 | UTF8Overlong_3

                   -- | (byte 3 is not @10xxxxxx@) Non-continuation third code unit.
                 | UTF8Incomplete_3_3
                   deriving (Int -> UTF8Error_3 -> ShowS
[UTF8Error_3] -> ShowS
UTF8Error_3 -> String
(Int -> UTF8Error_3 -> ShowS)
-> (UTF8Error_3 -> String)
-> ([UTF8Error_3] -> ShowS)
-> Show UTF8Error_3
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> UTF8Error_3 -> ShowS
showsPrec :: Int -> UTF8Error_3 -> ShowS
$cshow :: UTF8Error_3 -> String
show :: UTF8Error_3 -> String
$cshowList :: [UTF8Error_3] -> ShowS
showList :: [UTF8Error_3] -> ShowS
Show, UTF8Error_3 -> UTF8Error_3 -> Bool
(UTF8Error_3 -> UTF8Error_3 -> Bool)
-> (UTF8Error_3 -> UTF8Error_3 -> Bool) -> Eq UTF8Error_3
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: UTF8Error_3 -> UTF8Error_3 -> Bool
== :: UTF8Error_3 -> UTF8Error_3 -> Bool
$c/= :: UTF8Error_3 -> UTF8Error_3 -> Bool
/= :: UTF8Error_3 -> UTF8Error_3 -> Bool
Eq)


-- | Errors that may be encountered when parsing the fourth UTF-8 code unit.
data UTF8Error_4 = -- | (byte 2 is not @10xxxxxx@) Non-continuation second code unit.
                   UTF8Incomplete_2_4

                 | -- | (byte 1 is @11110__000__@, byte 2 is @10__00__xxxx@)
                   --   Overlong 4-unit code point.
                   UTF8Overlong_4

                   -- | (byte 1 is @11110__100__@, byte 2 is not @10__00__xxxx@)
                   --   Invalid 4-unit code point.
                 | UTF8Overflow_2

                   -- | (byte 3 is not @10xxxxxx@) Non-continuation third code unit.
                 | UTF8Incomplete_3_4

                   -- | (byte 4 is not @10xxxxxx@) Non-continuation fourth code unit.
                 | UTF8Incomplete_4_4
                   deriving (Int -> UTF8Error_4 -> ShowS
[UTF8Error_4] -> ShowS
UTF8Error_4 -> String
(Int -> UTF8Error_4 -> ShowS)
-> (UTF8Error_4 -> String)
-> ([UTF8Error_4] -> ShowS)
-> Show UTF8Error_4
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> UTF8Error_4 -> ShowS
showsPrec :: Int -> UTF8Error_4 -> ShowS
$cshow :: UTF8Error_4 -> String
show :: UTF8Error_4 -> String
$cshowList :: [UTF8Error_4] -> ShowS
showList :: [UTF8Error_4] -> ShowS
Show, UTF8Error_4 -> UTF8Error_4 -> Bool
(UTF8Error_4 -> UTF8Error_4 -> Bool)
-> (UTF8Error_4 -> UTF8Error_4 -> Bool) -> Eq UTF8Error_4
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: UTF8Error_4 -> UTF8Error_4 -> Bool
== :: UTF8Error_4 -> UTF8Error_4 -> Bool
$c/= :: UTF8Error_4 -> UTF8Error_4 -> Bool
/= :: UTF8Error_4 -> UTF8Error_4 -> Bool
Eq)



-- | First UTF-8 code unit.
--   @n@ represents the total number of code units in this code point.
newtype UTF8Unit (n :: Nat) = UTF8Unit Word8

-- | A Unicode code point.
newtype UTF8Point (n :: Nat) = UTF8Point Word32

-- | Convert a code point into a 'Char'.
fromUtf8 :: UTF8Point n -> Char
fromUtf8 :: forall (n :: Nat). UTF8Point n -> Char
fromUtf8 (UTF8Point Word32
w0) = Int -> Char
unsafeChr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
w0

-- | Check whether a 3-unit code point lies in the surrogate range (@U+D800@ to @U+DFFF@).
isSurrogate :: UTF8Point 3 -> Bool
isSurrogate :: UTF8Point 3 -> Bool
isSurrogate (UTF8Point Word32
w) = (Word32
w Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0xFFFFF800) Word32 -> Word32 -> Bool
forall a. Eq a => a -> a -> Bool
== Word32
0x0000D800



-- | UTF-8 branching based on the first code unit.
data UTF8Branch = UTF8_1 {-# UNPACK #-} !(UTF8Point 1)
                | UTF8_2 {-# UNPACK #-} !(UTF8Unit 2)
                | UTF8_3 {-# UNPACK #-} !(UTF8Unit 3)
                | UTF8_4 {-# UNPACK #-} !(UTF8Unit 4)

{-# INLINE unitUtf8 #-}
-- | Consume 1 byte that represents the first code unit of a UTF-8 code point.
unitUtf8
  :: (UTF8Error_1 -> e)  -- ^ Malformed
  -> e                   -- ^ Reached end.
  -> Parser e UTF8Branch
unitUtf8 :: forall e. (UTF8Error_1 -> e) -> e -> Parser e UTF8Branch
unitUtf8 UTF8Error_1 -> e
malformed e
end = do
  Word8
w0 <- e -> Parser e Word8
forall end. end -> Parser end Word8
word8 e
end
  Word8 -> Parser e UTF8Branch
go Word8
w0
  where
    go :: Word8 -> Parser e UTF8Branch
go Word8
w0
      | (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x80) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 = UTF8Branch -> Parser e UTF8Branch
forall a. a -> Parser e a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (UTF8Branch -> Parser e UTF8Branch)
-> UTF8Branch -> Parser e UTF8Branch
forall a b. (a -> b) -> a -> b
$! UTF8Point 1 -> UTF8Branch
UTF8_1 (Word32 -> UTF8Point 1
forall (n :: Nat). Word32 -> UTF8Point n
UTF8Point (Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w0))
      | (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x40) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 = e -> Parser e UTF8Branch
forall e a. e -> Parser e a
err (e -> Parser e UTF8Branch) -> e -> Parser e UTF8Branch
forall a b. (a -> b) -> a -> b
$ UTF8Error_1 -> e
malformed UTF8Error_1
UTF8Continuation
      | (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x20) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 = if (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x1F) Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
0x02
                               then e -> Parser e UTF8Branch
forall e a. e -> Parser e a
err (e -> Parser e UTF8Branch) -> e -> Parser e UTF8Branch
forall a b. (a -> b) -> a -> b
$ UTF8Error_1 -> e
malformed UTF8Error_1
UTF8Overlong_2
                               else UTF8Branch -> Parser e UTF8Branch
forall a. a -> Parser e a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (UTF8Branch -> Parser e UTF8Branch)
-> UTF8Branch -> Parser e UTF8Branch
forall a b. (a -> b) -> a -> b
$! UTF8Unit 2 -> UTF8Branch
UTF8_2 (Word8 -> UTF8Unit 2
forall (n :: Nat). Word8 -> UTF8Unit n
UTF8Unit Word8
w0)

      | (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x10) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 = UTF8Branch -> Parser e UTF8Branch
forall a. a -> Parser e a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (UTF8Branch -> Parser e UTF8Branch)
-> UTF8Branch -> Parser e UTF8Branch
forall a b. (a -> b) -> a -> b
$! UTF8Unit 3 -> UTF8Branch
UTF8_3 (Word8 -> UTF8Unit 3
forall (n :: Nat). Word8 -> UTF8Unit n
UTF8Unit Word8
w0)
      | (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x08) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 = if (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x07) Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Word8
0x04
                               then e -> Parser e UTF8Branch
forall e a. e -> Parser e a
err (e -> Parser e UTF8Branch) -> e -> Parser e UTF8Branch
forall a b. (a -> b) -> a -> b
$ UTF8Error_1 -> e
malformed UTF8Error_1
UTF8Overflow_1
                               else UTF8Branch -> Parser e UTF8Branch
forall a. a -> Parser e a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (UTF8Branch -> Parser e UTF8Branch)
-> UTF8Branch -> Parser e UTF8Branch
forall a b. (a -> b) -> a -> b
$! UTF8Unit 4 -> UTF8Branch
UTF8_4 (Word8 -> UTF8Unit 4
forall (n :: Nat). Word8 -> UTF8Unit n
UTF8Unit Word8
w0)

      | Bool
otherwise          = e -> Parser e UTF8Branch
forall e a. e -> Parser e a
err (e -> Parser e UTF8Branch) -> e -> Parser e UTF8Branch
forall a b. (a -> b) -> a -> b
$ UTF8Error_1 -> e
malformed UTF8Error_1
UTF8Invalid



checkIncomplete :: Word8 -> Bool
checkIncomplete :: Word8 -> Bool
checkIncomplete Word8
w1 = (Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0xC0) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x80

{-# INLINE contUtf8_2 #-}
-- | Consume 1 byte that represents the second code unit of a 2-unit UTF-8 code point
--   and convert the two units into a 'Char'.
contUtf8_2
  :: e             -- ^ (byte 2 is not @10xxxxxx@) Non-continuation second code unit.
  -> e             -- ^ Reached end.
  -> UTF8Unit 2
  -> Parser e Char
contUtf8_2 :: forall e. e -> e -> UTF8Unit 2 -> Parser e Char
contUtf8_2 e
incomplete e
end (UTF8Unit Word8
w0) = do
  Word8
w1 <- e -> Parser e Word8
forall end. end -> Parser end Word8
word8 e
end
  if Word8 -> Bool
checkIncomplete Word8
w1
    then e -> Parser e Char
forall e a. e -> Parser e a
err e
incomplete
    else Char -> Parser e Char
forall a. a -> Parser e a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Char -> Parser e Char) -> Char -> Parser e Char
forall a b. (a -> b) -> a -> b
$! Word8 -> Word8 -> Char
charUtf8_2 Word8
w0 Word8
w1

{-# INLINE skipUtf8_2 #-}
-- | Consume 1 byte that represents the second code unit of a 2-unit UTF-8 code point.
skipUtf8_2
  :: e           -- ^ (byte 2 is not @10xxxxxx@) Non-continuation second code unit.
  -> e           -- ^ Reached end.
  -> UTF8Unit 2
  -> Parser e ()
skipUtf8_2 :: forall e. e -> e -> UTF8Unit 2 -> Parser e ()
skipUtf8_2 e
incomplete e
end (UTF8Unit Word8
_) = do
  Word8
w1 <- e -> Parser e Word8
forall end. end -> Parser end Word8
word8 e
end
  if Word8 -> Bool
checkIncomplete Word8
w1
    then e -> Parser e ()
forall e a. e -> Parser e a
err e
incomplete
    else () -> Parser e ()
forall a. a -> Parser e a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

charUtf8_2 :: Word8 -> Word8 -> Char
charUtf8_2 :: Word8 -> Word8 -> Char
charUtf8_2 Word8
w0 Word8
w1 =
  Int -> Char
unsafeChr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x1F) Int
6
            Int -> Int -> Int
forall a. Num a => a -> a -> a
+              (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w1 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)



{-# INLINE contUtf8_3 #-}
-- | Consume 2 bytes that represent the second and third code units of
--   a 3-unit UTF-8 code point and convert the three units into a code point.
contUtf8_3
  :: (UTF8Error_3 -> e)     -- ^ Malformed.
  -> e                      -- ^ Reached end.
  -> UTF8Unit 3
  -> Parser e (UTF8Point 3)
contUtf8_3 :: forall e.
(UTF8Error_3 -> e) -> e -> UTF8Unit 3 -> Parser e (UTF8Point 3)
contUtf8_3 UTF8Error_3 -> e
e e
end (UTF8Unit Word8
w0) = Int
-> (ByteString -> (# Res e (UTF8Point 3) #))
-> e
-> Parser e (UTF8Point 3)
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 ((UTF8Error_3 -> e)
-> Word8 -> ByteString -> (# Res e (UTF8Point 3) #)
forall e.
(UTF8Error_3 -> e)
-> Word8 -> ByteString -> (# Res e (UTF8Point 3) #)
convUTF8_3 UTF8Error_3 -> e
e Word8
w0) e
end

{-# INLINE skipUtf8_3 #-}
-- | Consume 2 bytes that represent the second and third code units of
--   a 3-unit UTF-8 code point.
skipUtf8_3
  :: (UTF8Error_3 -> e) -- ^ Malformed.
  -> e                  -- ^ Reached end.
  -> UTF8Unit 3
  -> Parser e Bool      -- ^ 'True' if the skipped code point lies in
                        -- the surrogate range (@U+D800@ to @U+DFFF@).
skipUtf8_3 :: forall e. (UTF8Error_3 -> e) -> e -> UTF8Unit 3 -> Parser e Bool
skipUtf8_3 UTF8Error_3 -> e
e e
end (UTF8Unit Word8
w0) = Int -> (ByteString -> (# Res e Bool #)) -> e -> Parser e Bool
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
2 ((UTF8Error_3 -> e) -> Word8 -> ByteString -> (# Res e Bool #)
forall e.
(UTF8Error_3 -> e) -> Word8 -> ByteString -> (# Res e Bool #)
convUTF8_3_ UTF8Error_3 -> e
e Word8
w0) e
end



checkOverlong3 :: Word8 -> Word8 -> Bool
checkOverlong3 :: Word8 -> Word8 -> Bool
checkOverlong3 Word8
w0 Word8
w1 = (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x0F) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0 Bool -> Bool -> Bool
&& (Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x20) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0

checkSurrogate :: Word8 -> Word8 -> Bool
checkSurrogate :: Word8 -> Word8 -> Bool
checkSurrogate Word8
w0 Word8
w1 = (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x0F) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x0D Bool -> Bool -> Bool
&& (Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x20) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0

convUTF8_3
  :: (UTF8Error_3 -> e)
  -> Word8 -> ByteString -> (# Res e (UTF8Point 3) #)
convUTF8_3 :: forall e.
(UTF8Error_3 -> e)
-> Word8 -> ByteString -> (# Res e (UTF8Point 3) #)
convUTF8_3 UTF8Error_3 -> e
malformed Word8
w0 = \ByteString
b ->
  let w1 :: Word8
w1 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
0
      w2 :: Word8
w2 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
1

  in if Word8 -> Bool
checkIncomplete Word8
w1
       then (# e -> Res e (UTF8Point 3)
forall e a. e -> Res e a
No (UTF8Error_3 -> e
malformed UTF8Error_3
UTF8Incomplete_2_3) #)
       else
         if Word8 -> Word8 -> Bool
checkOverlong3 Word8
w0 Word8
w1
           then (# e -> Res e (UTF8Point 3)
forall e a. e -> Res e a
No (UTF8Error_3 -> e
malformed UTF8Error_3
UTF8Overlong_3) #)
           else
             if Word8 -> Bool
checkIncomplete Word8
w2
               then (# e -> Res e (UTF8Point 3)
forall e a. e -> Res e a
No (UTF8Error_3 -> e
malformed UTF8Error_3
UTF8Incomplete_3_3) #)
               else
                 let !r :: Word32
r = Word8 -> Word8 -> Word8 -> Word32
charUtf8_3 Word8
w0 Word8
w1 Word8
w2
                 in (# UTF8Point 3 -> Res e (UTF8Point 3)
forall a e. a -> Res e a
Yes (Word32 -> UTF8Point 3
forall (n :: Nat). Word32 -> UTF8Point n
UTF8Point Word32
r) #)

convUTF8_3_
  :: (UTF8Error_3 -> e)
  -> Word8 -> ByteString -> (# Res e Bool #)
convUTF8_3_ :: forall e.
(UTF8Error_3 -> e) -> Word8 -> ByteString -> (# Res e Bool #)
convUTF8_3_ UTF8Error_3 -> e
malformed Word8
w0 = \ByteString
b ->
  let w1 :: Word8
w1 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
0
      w2 :: Word8
w2 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
1

  in if Word8 -> Bool
checkIncomplete Word8
w1
       then (# e -> Res e Bool
forall e a. e -> Res e a
No (UTF8Error_3 -> e
malformed UTF8Error_3
UTF8Incomplete_2_3) #)
       else
         if Word8 -> Word8 -> Bool
checkOverlong3 Word8
w0 Word8
w1
           then (# e -> Res e Bool
forall e a. e -> Res e a
No (UTF8Error_3 -> e
malformed UTF8Error_3
UTF8Overlong_3) #)
           else
             if Word8 -> Bool
checkIncomplete Word8
w2
               then (# e -> Res e Bool
forall e a. e -> Res e a
No (UTF8Error_3 -> e
malformed UTF8Error_3
UTF8Incomplete_3_3) #)
               else
                 let !r :: Bool
r = Word8 -> Word8 -> Bool
checkSurrogate Word8
w0 Word8
w1
                 in (# Bool -> Res e Bool
forall a e. a -> Res e a
Yes Bool
r #)

charUtf8_3 :: Word8 -> Word8 -> Word8 -> Word32
charUtf8_3 :: Word8 -> Word8 -> Word8 -> Word32
charUtf8_3 Word8
w0 Word8
w1 Word8
w2 =
    Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w0 Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x0F) Int
12
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w1 Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3F) Int
6
  Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+              (Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w2 Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
0x3F)



{-# INLINE contUtf8_4 #-}
-- | Consume 3 bytes that represent the second to fourth code units of
--   a 4-unit UTF-8 code point and convert the four units into a 'Char'.
contUtf8_4
  :: (UTF8Error_4 -> e) -- ^ Malformed.
  -> e                  -- ^ Reached end.
  -> UTF8Unit 4
  -> Parser e Char
contUtf8_4 :: forall e. (UTF8Error_4 -> e) -> e -> UTF8Unit 4 -> Parser e Char
contUtf8_4 UTF8Error_4 -> e
e e
end (UTF8Unit Word8
w0) = Int -> (ByteString -> (# Res e Char #)) -> e -> Parser e Char
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
3 ((UTF8Error_4 -> e) -> Word8 -> ByteString -> (# Res e Char #)
forall e.
(UTF8Error_4 -> e) -> Word8 -> ByteString -> (# Res e Char #)
convUTF8_4 UTF8Error_4 -> e
e Word8
w0) e
end

{-# INLINE skipUtf8_4 #-}
-- | Consume 3 bytes that represent the second to fourth code units of
--   a 4-unit UTF-8 code point.
skipUtf8_4
  :: (UTF8Error_4 -> e) -- ^ Malformed.
  -> e                  -- ^ Reached end.
  -> UTF8Unit 4
  -> Parser e ()
skipUtf8_4 :: forall e. (UTF8Error_4 -> e) -> e -> UTF8Unit 4 -> Parser e ()
skipUtf8_4 UTF8Error_4 -> e
e e
end (UTF8Unit Word8
w0) = Int -> (ByteString -> (# Res e () #)) -> e -> Parser e ()
forall e a. Int -> (ByteString -> (# Res e a #)) -> e -> Parser e a
unsafeRead Int
3 ((UTF8Error_4 -> e) -> Word8 -> ByteString -> (# Res e () #)
forall e.
(UTF8Error_4 -> e) -> Word8 -> ByteString -> (# Res e () #)
convUTF8_4_ UTF8Error_4 -> e
e Word8
w0) e
end



checkOverlong4 :: Word8 -> Word8 -> Bool
checkOverlong4 :: Word8 -> Word8 -> Bool
checkOverlong4 Word8
w0 Word8
w1 = (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x07) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x00 Bool -> Bool -> Bool
&& (Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x30) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x00

checkOverflow4 :: Word8 -> Word8 -> Bool
checkOverflow4 :: Word8 -> Word8 -> Bool
checkOverflow4 Word8
w0 Word8
w1 = (Word8
w0 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x07) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x04 Bool -> Bool -> Bool
&& (Word8
w1 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x30) Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x00

convUTF8_4
  :: (UTF8Error_4 -> e)
  -> Word8 -> ByteString -> (# Res e Char #)
convUTF8_4 :: forall e.
(UTF8Error_4 -> e) -> Word8 -> ByteString -> (# Res e Char #)
convUTF8_4 UTF8Error_4 -> e
malformed Word8
w0 = \ByteString
b ->
  let w1 :: Word8
w1 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
0
      w2 :: Word8
w2 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
1
      w3 :: Word8
w3 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
2

  in if Word8 -> Bool
checkIncomplete Word8
w1
       then (# e -> Res e Char
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Incomplete_2_4) #)
       else
         if Word8 -> Word8 -> Bool
checkOverlong4 Word8
w0 Word8
w1
           then (# e -> Res e Char
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Overlong_4) #)
           else
             if Word8 -> Word8 -> Bool
checkOverflow4 Word8
w0 Word8
w1
               then (# e -> Res e Char
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Overflow_2) #)
               else
                 if Word8 -> Bool
checkIncomplete Word8
w2
                   then (# e -> Res e Char
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Incomplete_3_4) #)
                   else
                     if Word8 -> Bool
checkIncomplete Word8
w3
                       then (# e -> Res e Char
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Incomplete_4_4) #)
                       else
                         let !r :: Char
r = Word8 -> Word8 -> Word8 -> Word8 -> Char
charUtf8_4 Word8
w0 Word8
w1 Word8
w2 Word8
w3
                         in (# Char -> Res e Char
forall a e. a -> Res e a
Yes Char
r #)

convUTF8_4_
  :: (UTF8Error_4 -> e)
  -> Word8 -> ByteString -> (# Res e () #)
convUTF8_4_ :: forall e.
(UTF8Error_4 -> e) -> Word8 -> ByteString -> (# Res e () #)
convUTF8_4_ UTF8Error_4 -> e
malformed Word8
w0 = \ByteString
b ->
  let w1 :: Word8
w1 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
0
      w2 :: Word8
w2 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
1
      w3 :: Word8
w3 = ByteString -> Int -> Word8
B.unsafeIndex ByteString
b Int
2

  in if Word8 -> Bool
checkIncomplete Word8
w1
       then (# e -> Res e ()
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Incomplete_2_4) #)
       else
         if Word8 -> Word8 -> Bool
checkOverlong4 Word8
w0 Word8
w1
           then (# e -> Res e ()
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Overlong_4) #)
           else
             if Word8 -> Word8 -> Bool
checkOverflow4 Word8
w0 Word8
w1
               then (# e -> Res e ()
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Overflow_2) #)
               else
                 if Word8 -> Bool
checkIncomplete Word8
w2
                   then (# e -> Res e ()
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Incomplete_3_4) #)
                   else
                     if Word8 -> Bool
checkIncomplete Word8
w3
                       then (# e -> Res e ()
forall e a. e -> Res e a
No (UTF8Error_4 -> e
malformed UTF8Error_4
UTF8Incomplete_4_4) #)
                       else (# () -> Res e ()
forall a e. a -> Res e a
Yes () #)

charUtf8_4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
charUtf8_4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
charUtf8_4 Word8
w0 Word8
w1 Word8
w2 Word8
w3 =
  Int -> Char
unsafeChr (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w0 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x07) Int
18
            Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w1 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
12
            Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int -> Int -> Int
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w2 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int
6
            Int -> Int -> Int
forall a. Num a => a -> a -> a
+              (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w3 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)