{-# LANGUAGE CPP, MagicHash, BangPatterns #-}

-- |
-- Module      : Data.Text.Internal.Encoding.Utf8
-- Copyright   : (c) 2008, 2009 Tom Harper,
--               (c) 2009, 2010 Bryan O'Sullivan,
--               (c) 2009 Duncan Coutts
--               (c) 2021 Andrew Lelechenko
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Stability   : experimental
-- Portability : GHC
--
-- /Warning/: this is an internal module, and does not have a stable
-- API or name. Functions in this module may not check or enforce
-- preconditions expected by public modules. Use at your own risk!
--
-- Basic UTF-8 validation and character manipulation.
module Data.Text.Internal.Encoding.Utf8
    ( utf8Length
    , utf8LengthByLeader
    -- Decomposition
    , ord2
    , ord3
    , ord4
    -- Construction
    , chr2
    , chr3
    , chr4
    -- * Validation
    , validate1
    , validate2
    , validate3
    , validate4
    -- * Naive decoding
    , DecoderResult(..)
    , DecoderState(..)
    , CodePoint(..)
    , utf8DecodeStart
    , utf8DecodeContinue
    ) where

#if defined(ASSERTS)
import Control.Exception (assert)
import GHC.Stack (HasCallStack)
#endif
import Data.Bits (Bits(..), FiniteBits(..))
import Data.Char (ord, chr)
import GHC.Exts
import GHC.Word (Word8(..))

#if !MIN_VERSION_base(4,16,0)
-- harmless to import, except for warnings that it is unused.
import Data.Text.Internal.PrimCompat (word8ToWord#)
#endif

default(Int)

between :: Word8                -- ^ byte to check
        -> Word8                -- ^ lower bound
        -> Word8                -- ^ upper bound
        -> Bool
between :: Word8 -> Word8 -> Word8 -> Bool
between Word8
x Word8
y Word8
z = Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
y Bool -> Bool -> Bool
&& Word8
x Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
z
{-# INLINE between #-}

-- This is a branchless version of
-- utf8Length c
--   | ord c < 0x80    = 1
--   | ord c < 0x800   = 2
--   | ord c < 0x10000 = 3
--   | otherwise       = 4
-- Implementation suggested by Alex Mason.

-- | @since 2.0
utf8Length :: Char -> Int
utf8Length :: Char -> Int
utf8Length (C# Char#
c) = Int# -> Int
I# ((Int#
1# Int# -> Int# -> Int#
+# Char# -> Char# -> Int#
geChar# Char#
c (Int# -> Char#
chr# Int#
0x80#)) Int# -> Int# -> Int#
+# (Char# -> Char# -> Int#
geChar# Char#
c (Int# -> Char#
chr# Int#
0x800#) Int# -> Int# -> Int#
+# Char# -> Char# -> Int#
geChar# Char#
c (Int# -> Char#
chr# Int#
0x10000#)))
{-# INLINE utf8Length #-}

-- This is a branchless version of
-- utf8LengthByLeader w
--   | w < 0x80  = 1
--   | w < 0xE0  = 2
--   | w < 0xF0  = 3
--   | otherwise = 4
--
-- c `xor` I# (c# <=# 0#) is a branchless equivalent of c `max` 1.
-- It is crucial to write c# <=# 0# and not c# ==# 0#, otherwise
-- GHC is tempted to "optimize" by introduction of branches.

-- | @since 2.0
utf8LengthByLeader :: Word8 -> Int
utf8LengthByLeader :: Word8 -> Int
utf8LengthByLeader Word8
w = Int
c Int -> Int -> Int
forall a. Bits a => a -> a -> a
`xor` Int# -> Int
I# (Int#
c# Int# -> Int# -> Int#
<=# Int#
0#)
  where
    !c :: Int
c@(I# Int#
c#) = Word8 -> Int
forall b. FiniteBits b => b -> Int
countLeadingZeros (Word8 -> Word8
forall a. Bits a => a -> a
complement Word8
w)
{-# INLINE utf8LengthByLeader #-}

ord2 ::
#if defined(ASSERTS)
  HasCallStack =>
#endif
  Char -> (Word8,Word8)
ord2 :: Char -> (Word8, Word8)
ord2 Char
c =
#if defined(ASSERTS)
    assert (n >= 0x80 && n <= 0x07ff)
#endif
    (Word8
x1,Word8
x2)
    where
      n :: Int
n  = Char -> Int
ord Char
c
      x1 :: Word8
x1 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xC0
      x2 :: Word8
x2 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F)   Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
{-# INLINE ord2 #-}

ord3 ::
#if defined(ASSERTS)
  HasCallStack =>
#endif
  Char -> (Word8,Word8,Word8)
ord3 :: Char -> (Word8, Word8, Word8)
ord3 Char
c =
#if defined(ASSERTS)
    assert (n >= 0x0800 && n <= 0xffff)
#endif
    (Word8
x1,Word8
x2,Word8
x3)
    where
      n :: Int
n  = Char -> Int
ord Char
c
      x1 :: Word8
x1 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xE0
      x2 :: Word8
x2 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
      x3 :: Word8
x3 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
{-# INLINE ord3 #-}

ord4 ::
#if defined(ASSERTS)
  HasCallStack =>
#endif
  Char -> (Word8,Word8,Word8,Word8)
ord4 :: Char -> (Word8, Word8, Word8, Word8)
ord4 Char
c =
#if defined(ASSERTS)
    assert (n >= 0x10000)
#endif
    (Word8
x1,Word8
x2,Word8
x3,Word8
x4)
    where
      n :: Int
n  = Char -> Int
ord Char
c
      x1 :: Word8
x1 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
18) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0xF0
      x2 :: Word8
x2 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
      x3 :: Word8
x3 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ ((Int
n Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
      x4 :: Word8
x4 = Int -> Word8
intToWord8 (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ (Int
n Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3F) Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
0x80
{-# INLINE ord4 #-}

chr2 :: Word8 -> Word8 -> Char
chr2 :: Word8 -> Word8 -> Char
chr2 (W8# Word#
x1#) (W8# Word#
x2#) = Char# -> Char
C# (Int# -> Char#
chr# (Int#
z1# Int# -> Int# -> Int#
+# Int#
z2#))
    where
      !y1# :: Int#
y1# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x1#)
      !y2# :: Int#
y2# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x2#)
      !z1# :: Int#
z1# = Int# -> Int# -> Int#
uncheckedIShiftL# (Int#
y1# Int# -> Int# -> Int#
-# Int#
0xC0#) Int#
6#
      !z2# :: Int#
z2# = Int#
y2# Int# -> Int# -> Int#
-# Int#
0x80#
{-# INLINE chr2 #-}

chr3 :: Word8 -> Word8 -> Word8 -> Char
chr3 :: Word8 -> Word8 -> Word8 -> Char
chr3 (W8# Word#
x1#) (W8# Word#
x2#) (W8# Word#
x3#) = Char# -> Char
C# (Int# -> Char#
chr# (Int#
z1# Int# -> Int# -> Int#
+# Int#
z2# Int# -> Int# -> Int#
+# Int#
z3#))
    where
      !y1# :: Int#
y1# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x1#)
      !y2# :: Int#
y2# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x2#)
      !y3# :: Int#
y3# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x3#)
      !z1# :: Int#
z1# = Int# -> Int# -> Int#
uncheckedIShiftL# (Int#
y1# Int# -> Int# -> Int#
-# Int#
0xE0#) Int#
12#
      !z2# :: Int#
z2# = Int# -> Int# -> Int#
uncheckedIShiftL# (Int#
y2# Int# -> Int# -> Int#
-# Int#
0x80#) Int#
6#
      !z3# :: Int#
z3# = Int#
y3# Int# -> Int# -> Int#
-# Int#
0x80#
{-# INLINE chr3 #-}

chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 (W8# Word#
x1#) (W8# Word#
x2#) (W8# Word#
x3#) (W8# Word#
x4#) =
    Char# -> Char
C# (Int# -> Char#
chr# (Int#
z1# Int# -> Int# -> Int#
+# Int#
z2# Int# -> Int# -> Int#
+# Int#
z3# Int# -> Int# -> Int#
+# Int#
z4#))
    where
      !y1# :: Int#
y1# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x1#)
      !y2# :: Int#
y2# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x2#)
      !y3# :: Int#
y3# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x3#)
      !y4# :: Int#
y4# = Word# -> Int#
word2Int# (Word# -> Word#
word8ToWord# Word#
x4#)
      !z1# :: Int#
z1# = Int# -> Int# -> Int#
uncheckedIShiftL# (Int#
y1# Int# -> Int# -> Int#
-# Int#
0xF0#) Int#
18#
      !z2# :: Int#
z2# = Int# -> Int# -> Int#
uncheckedIShiftL# (Int#
y2# Int# -> Int# -> Int#
-# Int#
0x80#) Int#
12#
      !z3# :: Int#
z3# = Int# -> Int# -> Int#
uncheckedIShiftL# (Int#
y3# Int# -> Int# -> Int#
-# Int#
0x80#) Int#
6#
      !z4# :: Int#
z4# = Int#
y4# Int# -> Int# -> Int#
-# Int#
0x80#
{-# INLINE chr4 #-}

validate1 :: Word8 -> Bool
validate1 :: Word8 -> Bool
validate1 Word8
x1 = Word8
x1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x7F
{-# INLINE validate1 #-}

validate2 :: Word8 -> Word8 -> Bool
validate2 :: Word8 -> Word8 -> Bool
validate2 Word8
x1 Word8
x2 = Word8 -> Word8 -> Word8 -> Bool
between Word8
x1 Word8
0xC2 Word8
0xDF Bool -> Bool -> Bool
&& Word8 -> Word8 -> Word8 -> Bool
between Word8
x2 Word8
0x80 Word8
0xBF
{-# INLINE validate2 #-}

validate3 :: Word8 -> Word8 -> Word8 -> Bool
{-# INLINE validate3 #-}
validate3 :: Word8 -> Word8 -> Word8 -> Bool
validate3 Word8
x1 Word8
x2 Word8
x3 = Bool
validate3_1 Bool -> Bool -> Bool
|| Bool
validate3_2 Bool -> Bool -> Bool
|| Bool
validate3_3 Bool -> Bool -> Bool
|| Bool
validate3_4
  where
    validate3_1 :: Bool
validate3_1 = (Word8
x1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xE0) Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x2 Word8
0xA0 Word8
0xBF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x3 Word8
0x80 Word8
0xBF
    validate3_2 :: Bool
validate3_2 = Word8 -> Word8 -> Word8 -> Bool
between Word8
x1 Word8
0xE1 Word8
0xEC Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x2 Word8
0x80 Word8
0xBF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x3 Word8
0x80 Word8
0xBF
    validate3_3 :: Bool
validate3_3 = Word8
x1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xED Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x2 Word8
0x80 Word8
0x9F Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x3 Word8
0x80 Word8
0xBF
    validate3_4 :: Bool
validate3_4 = Word8 -> Word8 -> Word8 -> Bool
between Word8
x1 Word8
0xEE Word8
0xEF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x2 Word8
0x80 Word8
0xBF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x3 Word8
0x80 Word8
0xBF

validate4 :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
{-# INLINE validate4 #-}
validate4 :: Word8 -> Word8 -> Word8 -> Word8 -> Bool
validate4 Word8
x1 Word8
x2 Word8
x3 Word8
x4 = Bool
validate4_1 Bool -> Bool -> Bool
|| Bool
validate4_2 Bool -> Bool -> Bool
|| Bool
validate4_3
  where
    validate4_1 :: Bool
validate4_1 = Word8
x1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xF0 Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x2 Word8
0x90 Word8
0xBF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x3 Word8
0x80 Word8
0xBF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x4 Word8
0x80 Word8
0xBF
    validate4_2 :: Bool
validate4_2 = Word8 -> Word8 -> Word8 -> Bool
between Word8
x1 Word8
0xF1 Word8
0xF3 Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x2 Word8
0x80 Word8
0xBF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x3 Word8
0x80 Word8
0xBF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x4 Word8
0x80 Word8
0xBF
    validate4_3 :: Bool
validate4_3 = Word8
x1 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0xF4 Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x2 Word8
0x80 Word8
0x8F Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x3 Word8
0x80 Word8
0xBF Bool -> Bool -> Bool
&&
                  Word8 -> Word8 -> Word8 -> Bool
between Word8
x4 Word8
0x80 Word8
0xBF

intToWord8 :: Int -> Word8
intToWord8 :: Int -> Word8
intToWord8 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral

word8ToInt :: Word8 -> Int
word8ToInt :: Word8 -> Int
word8ToInt = Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral

-------------------------------------------------------------------------------
-- Naive UTF8 decoder.
-- See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for the explanation of the state machine.

newtype ByteClass = ByteClass Word8

byteToClass :: Word8 -> ByteClass
byteToClass :: Word8 -> ByteClass
byteToClass Word8
n = Word8 -> ByteClass
ByteClass (Word# -> Word8
W8# Word#
el#)
  where
    !(I# Int#
n#) = Word8 -> Int
word8ToInt Word8
n
    el# :: Word#
el# = Addr# -> Int# -> Word#
indexWord8OffAddr# Addr#
table# Int#
n#

    table# :: Addr#
    table# :: Addr#
table# = Addr#
"\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\NUL\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\SOH\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\a\b\b\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\STX\n\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\ETX\EOT\ETX\ETX\v\ACK\ACK\ACK\ENQ\b\b\b\b\b\b\b\b\b\b\b"#

newtype DecoderState = DecoderState Word8
  deriving (DecoderState -> DecoderState -> Bool
(DecoderState -> DecoderState -> Bool)
-> (DecoderState -> DecoderState -> Bool) -> Eq DecoderState
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DecoderState -> DecoderState -> Bool
$c/= :: DecoderState -> DecoderState -> Bool
== :: DecoderState -> DecoderState -> Bool
$c== :: DecoderState -> DecoderState -> Bool
Eq)

utf8AcceptState :: DecoderState
utf8AcceptState :: DecoderState
utf8AcceptState = Word8 -> DecoderState
DecoderState Word8
0

utf8RejectState :: DecoderState
utf8RejectState :: DecoderState
utf8RejectState = Word8 -> DecoderState
DecoderState Word8
12

updateState :: ByteClass -> DecoderState -> DecoderState
updateState :: ByteClass -> DecoderState -> DecoderState
updateState (ByteClass Word8
c) (DecoderState Word8
s) = Word8 -> DecoderState
DecoderState (Word# -> Word8
W8# Word#
el#)
  where
    !(I# Int#
n#) = Word8 -> Int
word8ToInt (Word8
c Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
s)
    el# :: Word#
el# = Addr# -> Int# -> Word#
indexWord8OffAddr# Addr#
table# Int#
n#

    table# :: Addr#
    table# :: Addr#
table# = Addr#
"\NUL\f\CAN$<`T\f\f\f0H\f\f\f\f\f\f\f\f\f\f\f\f\f\NUL\f\f\f\f\f\NUL\f\NUL\f\f\f\CAN\f\f\f\f\f\CAN\f\CAN\f\f\f\f\f\f\f\f\f\CAN\f\f\f\f\f\CAN\f\f\f\f\f\f\f\CAN\f\f\f\f\f\f\f\f\f$\f$\f\f\f$\f\f\f\f\f$\f$\f\f\f$\f\f\f\f\f\f\f\f\f\f"#

newtype CodePoint = CodePoint Int

-- | @since 2.0
data DecoderResult
  = Accept !Char
  | Incomplete !DecoderState !CodePoint
  | Reject

-- | @since 2.0
utf8DecodeStart :: Word8 -> DecoderResult
utf8DecodeStart :: Word8 -> DecoderResult
utf8DecodeStart !Word8
w
  | DecoderState
st DecoderState -> DecoderState -> Bool
forall a. Eq a => a -> a -> Bool
== DecoderState
utf8AcceptState = Char -> DecoderResult
Accept (Int -> Char
chr (Word8 -> Int
word8ToInt Word8
w))
  | DecoderState
st DecoderState -> DecoderState -> Bool
forall a. Eq a => a -> a -> Bool
== DecoderState
utf8RejectState = DecoderResult
Reject
  | Bool
otherwise             = DecoderState -> CodePoint -> DecoderResult
Incomplete DecoderState
st (Int -> CodePoint
CodePoint Int
cp)
  where
    cl :: ByteClass
cl@(ByteClass Word8
cl') = Word8 -> ByteClass
byteToClass Word8
w
    st :: DecoderState
st = ByteClass -> DecoderState -> DecoderState
updateState ByteClass
cl DecoderState
utf8AcceptState
    cp :: Int
cp = Word8 -> Int
word8ToInt (Word8 -> Int) -> Word8 -> Int
forall a b. (a -> b) -> a -> b
$ (Word8
0xff Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
`unsafeShiftR` Word8 -> Int
word8ToInt Word8
cl') Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
w

-- | @since 2.0
utf8DecodeContinue :: Word8 -> DecoderState -> CodePoint -> DecoderResult
utf8DecodeContinue :: Word8 -> DecoderState -> CodePoint -> DecoderResult
utf8DecodeContinue !Word8
w !DecoderState
st (CodePoint !Int
cp)
  | DecoderState
st' DecoderState -> DecoderState -> Bool
forall a. Eq a => a -> a -> Bool
== DecoderState
utf8AcceptState = Char -> DecoderResult
Accept (Int -> Char
chr Int
cp')
  | DecoderState
st' DecoderState -> DecoderState -> Bool
forall a. Eq a => a -> a -> Bool
== DecoderState
utf8RejectState = DecoderResult
Reject
  | Bool
otherwise              = DecoderState -> CodePoint -> DecoderResult
Incomplete DecoderState
st' (Int -> CodePoint
CodePoint Int
cp')
  where
    cl :: ByteClass
cl  = Word8 -> ByteClass
byteToClass Word8
w
    st' :: DecoderState
st' = ByteClass -> DecoderState -> DecoderState
updateState ByteClass
cl DecoderState
st
    cp' :: Int
cp' = (Int
cp Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftL` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Word8 -> Int
word8ToInt (Word8
w Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3f)