{-# LANGUAGE CPP, MagicHash, TypeSynonymInstances, FlexibleInstances #-}

{- |
  Module      :  Codec.Binary.UTF8.Light
  Copyright   :  (c) Matt Morrow 2008, Francesco Ariis 2022
  License     :  BSD3
  Maintainer  :  Francesco Ariis <fa-ml@ariis.it>
  Stability   :  provisional
  Portability :  portable

  Lightweight UTF8 handling.
-}

module Codec.Binary.UTF8.Light (
    UTF8(..)
  , lenUTF8
  , lenUTF16
  , countUTF8
  , decodeUTF8
  , encodeUTF8
  , encodeUTF8'
  , withUTF8
  , putUTF8
  , putUTF8Ln
  , hPutUTF8
  , hPutUTF8Ln
  , readUTF8File
  , writeUTF8File
  , appendUTF8File
  , hGetUTF8Line
  , hGetUTF8Contents
  , hGetUTF8
  , hGetUTF8NonBlocking
  , flipUTF8
  , unflipUTF8
  , flipTab
  , unflipTab
  , showHex
  , toBits
  , fromBits
  , Int8,Int16,Int32
  , Word,Word8,Word16,Word32
  , c2w, w2c
) where

import Data.Bits
import Data.List(foldl')
import Data.Char(chr,ord)
import Data.ByteString(ByteString)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Internal as B
import Data.ByteString.Unsafe
import System.IO(Handle)
import Codec.Binary.UTF8.Light.Helper (c2w, w2c, cwrd, wh, toW8)

#if defined(__GLASGOW_HASKELL__)
import GHC.Exts
  (Int(I#))
import GHC.Int (Int8, Int16, Int32)
import GHC.Word
  (Word8, Word16, Word32(W32#))
import GHC.Prim
  (int2Word# ,and#, or#, ltWord#, uncheckedShiftRL#)
#else
import Data.Word
  (Word,Word8,Word16,Word32)
import Data.Int(Int32)
#endif

-- | For convenience
fi :: (Num b, Integral a) => a -> b
fi :: forall b a. (Num b, Integral a) => a -> b
fi = forall a b. (Integral a, Num b) => a -> b
fromIntegral

-- | Instances:
--    @ByteString@, @String@
--    , @[Word32]@, @[Word]@
--    , @[Int32]@, @[Int]@
class UTF8 a where
  encode :: a -> ByteString
  decode :: ByteString -> a

instance UTF8 ByteString where
  encode :: ByteString -> ByteString
encode = forall a. a -> a
id
  decode :: ByteString -> ByteString
decode = forall a. a -> a
id

instance UTF8 [Word32] where
  encode :: [Word32] -> ByteString
encode = [Word32] -> ByteString
encodeUTF8
  decode :: ByteString -> [Word32]
decode = ByteString -> [Word32]
decodeUTF8

instance UTF8 [Word] where
  encode :: [Word] -> ByteString
encode = [Word32] -> ByteString
encodeUTF8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall b a. (Num b, Integral a) => a -> b
fi
  decode :: ByteString -> [Word]
decode = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall b a. (Num b, Integral a) => a -> b
fi forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word32]
decodeUTF8

instance UTF8 [Int32] where
  encode :: [Int32] -> ByteString
encode = [Word32] -> ByteString
encodeUTF8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall b a. (Num b, Integral a) => a -> b
fi
  decode :: ByteString -> [Int32]
decode = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall b a. (Num b, Integral a) => a -> b
fi forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word32]
decodeUTF8

instance UTF8 [Int] where
  encode :: [Int] -> ByteString
encode = [Word32] -> ByteString
encodeUTF8 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall b a. (Num b, Integral a) => a -> b
fi
  decode :: ByteString -> [Int]
decode = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall b a. (Num b, Integral a) => a -> b
fi forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [Word32]
decodeUTF8

instance UTF8 String where
  encode :: [Char] -> ByteString
encode = forall a. UTF8 a => a -> ByteString
encode forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Int
ord
  decode :: ByteString -> [Char]
decode = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Int -> Char
chr forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => ByteString -> a
decode

withUTF8 :: (UTF8 a) => a -> (ByteString -> b) -> b
withUTF8 :: forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 a
a ByteString -> b
k = ByteString -> b
k (forall a. UTF8 a => a -> ByteString
encode a
a)

putUTF8 :: (UTF8 a) => a -> IO ()
putUTF8 :: forall a. UTF8 a => a -> IO ()
putUTF8 = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 ByteString -> IO ()
B.putStr

putUTF8Ln :: (UTF8 a) => a -> IO ()
putUTF8Ln :: forall a. UTF8 a => a -> IO ()
putUTF8Ln = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 ByteString -> IO ()
B8.putStrLn

hPutUTF8 :: (UTF8 a) => Handle -> a -> IO ()
hPutUTF8 :: forall a. UTF8 a => Handle -> a -> IO ()
hPutUTF8 Handle
h = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 (Handle -> ByteString -> IO ()
B.hPut Handle
h)

hPutUTF8Ln :: (UTF8 a) => Handle -> a -> IO ()
hPutUTF8Ln :: forall a. UTF8 a => Handle -> a -> IO ()
hPutUTF8Ln Handle
h = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b. UTF8 a => a -> (ByteString -> b) -> b
withUTF8 (Handle -> ByteString -> IO ()
B8.hPutStrLn Handle
h)

readUTF8File :: (UTF8 a) => FilePath -> IO a
readUTF8File :: forall a. UTF8 a => [Char] -> IO a
readUTF8File = (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => ByteString -> a
decode forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO ByteString
B.readFile

writeUTF8File :: (UTF8 a) => FilePath -> a -> IO ()
writeUTF8File :: forall a. UTF8 a => [Char] -> a -> IO ()
writeUTF8File [Char]
p = [Char] -> ByteString -> IO ()
B.writeFile [Char]
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => a -> ByteString
encode

appendUTF8File :: (UTF8 a) => FilePath -> a -> IO ()
appendUTF8File :: forall a. UTF8 a => [Char] -> a -> IO ()
appendUTF8File [Char]
p = [Char] -> ByteString -> IO ()
B.appendFile [Char]
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => a -> ByteString
encode

hGetUTF8Line :: (UTF8 a) => Handle -> IO a
hGetUTF8Line :: forall a. UTF8 a => Handle -> IO a
hGetUTF8Line = (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => ByteString -> a
decode forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> IO ByteString
B.hGetLine

hGetUTF8Contents :: (UTF8 a) => Handle -> IO a
hGetUTF8Contents :: forall a. UTF8 a => Handle -> IO a
hGetUTF8Contents = (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => ByteString -> a
decode forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> IO ByteString
B.hGetContents

-- | Be careful that you're sure you're not
--  chopping a UTF8 char in two!
hGetUTF8 :: (UTF8 a) => Handle -> Int -> IO a
hGetUTF8 :: forall a. UTF8 a => Handle -> Int -> IO a
hGetUTF8 Handle
h = (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => ByteString -> a
decode forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> Int -> IO ByteString
B.hGet Handle
h

-- | Same warning as for @hGetUTF8@
hGetUTF8NonBlocking :: (UTF8 a) => Handle -> Int -> IO a
hGetUTF8NonBlocking :: forall a. UTF8 a => Handle -> Int -> IO a
hGetUTF8NonBlocking Handle
h = (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => ByteString -> a
decode forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> Int -> IO ByteString
B.hGetNonBlocking Handle
h

-- | Length in Word8s
lenUTF8 :: Word8 -> Int
{-# INLINE lenUTF8 #-}
lenUTF8 :: Word8 -> Int
lenUTF8 Word8
w8
  | Word8
w8 forall a. Ord a => a -> a -> Bool
< Word8
0x80 = Int
1
  | Word8
w8 forall a. Ord a => a -> a -> Bool
< Word8
0xe0 = Int
2
  | Word8
w8 forall a. Ord a => a -> a -> Bool
< Word8
0xf0 = Int
3
  | Word8
w8 forall a. Ord a => a -> a -> Bool
< Word8
0xf8 = Int
4
  | Bool
otherwise = Int
0

-- | Length in Word16s
lenUTF16 :: Word16 -> Int
lenUTF16 :: Word16 -> Int
lenUTF16 Word16
w16
-- I'm sure this could be
-- made more efficient
  | Word16
w16forall a. Bits a => a -> Int -> a
`shiftR`Int
10forall a. Eq a => a -> a -> Bool
==Word16
0x36 = Int
2
  | Word16
w16forall a. Bits a => a -> Int -> a
`shiftR`Int
10forall a. Eq a => a -> a -> Bool
==Word16
0x37 = Int
0
  | Bool
otherwise           = Int
1

-- | Lengths in Word8s
countUTF8 :: ByteString -> [Int]
countUTF8 :: ByteString -> [Int]
countUTF8 ByteString
bs = Int -> Int -> ByteString -> [Int]
go Int
0 (ByteString -> Int
B.length ByteString
bs) ByteString
bs
  where go :: Int -> Int -> ByteString -> [Int]
        go :: Int -> Int -> ByteString -> [Int]
go Int
i Int
len ByteString
s | Int
len forall a. Ord a => a -> a -> Bool
<= Int
i = []
          | Bool
otherwise = case Word8 -> Int
lenUTF8 (ByteString -> Int -> Word8
unsafeIndex ByteString
s Int
i)
                          of  Int
0 -> []
                              Int
n -> Int
n forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Int]
go (Int
iforall a. Num a => a -> a -> a
+Int
n) Int
len ByteString
s

encodeUTF8 :: [Word32] -> ByteString
encodeUTF8 :: [Word32] -> ByteString
encodeUTF8 = [Word8] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Word32] -> [[Word8]]
encodeUTF8'

#if !defined(__GLASGOW_HASKELL__)

-- | Word32s not representing
--  valid UTF8 chars are /dropped/.
encodeUTF8' :: [Word32] -> [[Word8]]
encodeUTF8' [] = []
encodeUTF8' (x:xs)
  | x < 0x80 =
      [fi x] : encodeUTF8' xs
  | x < 0x800 =
      [ fi(x`shiftR`6.|.0xc0)
      , fi(x.&.0x3f.|.0x80)
      ] : encodeUTF8' xs
  | x < 0xf0000 =
      [ fi(x`shiftR`12.|.0xe0)
      , fi(x`shiftR`6.&.0x3f.|.0x80)
      , fi(x.&.0x3f.|.0x80)
      ] : encodeUTF8' xs
  | x < 0xe00000 =
      [ fi(x`shiftR`18.|.0xf0)
      , fi(x`shiftR`12.&.0x3f.|.0x80)
      , fi(x`shiftR`6.&.0x3f.|.0x80)
      , fi(x.&.0x3f.|.0x80)
      ] : encodeUTF8' xs
  | otherwise = [] : encodeUTF8' xs

decodeUTF8 :: ByteString -> [Word32]
decodeUTF8 s = go 0 (B.length s) s
  where go :: Int -> Int -> ByteString -> [Word32]
        go i len s | len <= i  = []
          | otherwise = let c1 = unsafeIndex s i
                        in case lenUTF8 c1 of
                            0 -> []
                            1 -> fi c1 : go (i+1) len s
                            2 -> if len <= i+1 then [] else
                                  let c2 = unsafeIndex s (i+1)
                                  in fi(c1.&.0x1f)`shiftL`6
                                        `xor`fi(c2.&.0x3f)
                                          : go (i+2) len s
                            3 -> if len <= i+2 then [] else
                                  let c2 = unsafeIndex s (i+1)
                                      c3 = unsafeIndex s (i+2)
                                  in fi(c1.&.0x1f)`shiftL`12
                                      `xor`fi(c2.&.0x3f)`shiftL`6
                                        `xor`fi(c3.&.0x3f)
                                          : go (i+3) len s
                            4 -> if len <= i+3 then [] else
                                  let c2 = unsafeIndex s (i+1)
                                      c3 = unsafeIndex s (i+2)
                                      c4 = unsafeIndex s (i+3)
                                  in fi(c1.&.0x1f)`shiftL`18
                                      `xor`fi(c2.&.0x3f)`shiftL`12
                                        `xor`fi(c3.&.0x3f)`shiftL`6
                                          `xor`fi(c4.&.0x3f)
                                            : go (i+4) len s

#else


-- | Word32s not representing
--  valid UTF8 chars are /dropped/.
encodeUTF8' :: [Word32] -> [[Word8]]
encodeUTF8' :: [Word32] -> [[Word8]]
encodeUTF8' [] = []
-- with ghc-6.10, we
-- can use Word# literalls
-- ==> 0xff00ff00##
encodeUTF8' ((W32# Word32#
w):[Word32]
xs)
#if MIN_VERSION_base(4,7,0)
  | Int# -> Int
I# (Word#
wwWord# -> Word# -> Int#
`ltWord#`(Int# -> Word#
int2Word# Int#
0x80#)) forall a. Eq a => a -> a -> Bool
/= Int
0 =
#else
  | w`ltWord#`(int2Word# 0x80#) =
#endif
      [Word32# -> Word8
wh Word32#
w] forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs

#if MIN_VERSION_base(4,7,0)
  | Int# -> Int
I# (Word#
wwWord# -> Word# -> Int#
`ltWord#`(Int# -> Word#
int2Word# Int#
0x800#)) forall a. Eq a => a -> a -> Bool
/= Int
0 =
#else
  | w`ltWord#`(int2Word# 0x800#) =
#endif
      [ Word# -> Word8
toW8(Word#
wwWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
6#
              Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0xc0#))
      , Word# -> Word8
toW8(Word#
wwWord# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
              Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
      ] forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs
#if MIN_VERSION_base(4,7,0)
  | Int# -> Int
I# (Word#
wwWord# -> Word# -> Int#
`ltWord#`(Int# -> Word#
int2Word# Int#
0xf0000#)) forall a. Eq a => a -> a -> Bool
/= Int
0 =
#else
  | w`ltWord#`(int2Word# 0xf0000#) =
#endif
      [ Word# -> Word8
toW8(Word#
wwWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
12#
              Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0xe0#))
      , Word# -> Word8
toW8(Word#
wwWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
6#
              Word# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
                Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
      , Word# -> Word8
toW8(Word#
wwWord# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
              Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
      ] forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs
#if MIN_VERSION_base(4,7,0)
  | Int# -> Int
I# (Word#
wwWord# -> Word# -> Int#
`ltWord#`(Int# -> Word#
int2Word# Int#
0xe00000#)) forall a. Eq a => a -> a -> Bool
/= Int
0 =
#else
  | w`ltWord#`(int2Word# 0xe00000#) =
#endif
      [ Word# -> Word8
toW8(Word#
wwWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
18#
              Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0xf0#))
      , Word# -> Word8
toW8(Word#
wwWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
12#
              Word# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
                Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
      , Word# -> Word8
toW8(Word#
wwWord# -> Int# -> Word#
`uncheckedShiftRL#`Int#
6#
              Word# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
                Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
      , Word# -> Word8
toW8(Word#
wwWord# -> Word# -> Word#
`and#`(Int# -> Word#
int2Word# Int#
0x3f#)
              Word# -> Word# -> Word#
`or#`(Int# -> Word#
int2Word# Int#
0x80#))
      ] forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs
  | Bool
otherwise = [] forall a. a -> [a] -> [a]
: [Word32] -> [[Word8]]
encodeUTF8' [Word32]
xs

    where
          ww :: Word#
ww = Word32# -> Word#
cwrd Word32#
w

-- TODO: ghc-ify decodeUTF8
decodeUTF8 :: ByteString -> [Word32]
decodeUTF8 :: ByteString -> [Word32]
decodeUTF8 ByteString
bs = Int -> Int -> ByteString -> [Word32]
go Int
0 (ByteString -> Int
B.length ByteString
bs) ByteString
bs
  where go :: Int -> Int -> ByteString -> [Word32]
        go :: Int -> Int -> ByteString -> [Word32]
go Int
i Int
len ByteString
s | Int
len forall a. Ord a => a -> a -> Bool
<= Int
i  = []
          | Bool
otherwise = let c1 :: Word8
c1 = ByteString -> Int -> Word8
unsafeIndex ByteString
s Int
i
                        in case Word8 -> Int
lenUTF8 Word8
c1 of
                            Int
0 -> []
                            Int
1 -> forall b a. (Num b, Integral a) => a -> b
fi Word8
c1 forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Word32]
go (Int
iforall a. Num a => a -> a -> a
+Int
1) Int
len ByteString
s
                            Int
2 -> if Int
len forall a. Ord a => a -> a -> Bool
<= Int
iforall a. Num a => a -> a -> a
+Int
1 then [] else
                                  let c2 :: Word8
c2 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iforall a. Num a => a -> a -> a
+Int
1)
                                  in forall b a. (Num b, Integral a) => a -> b
fi(Word8
c1forall a. Bits a => a -> a -> a
.&.Word8
0x1f)forall a. Bits a => a -> Int -> a
`shiftL`Int
6
                                        forall a. Bits a => a -> a -> a
`xor`forall b a. (Num b, Integral a) => a -> b
fi(Word8
c2forall a. Bits a => a -> a -> a
.&.Word8
0x3f)
                                          forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Word32]
go (Int
iforall a. Num a => a -> a -> a
+Int
2) Int
len ByteString
s
                            Int
3 -> if Int
len forall a. Ord a => a -> a -> Bool
<= Int
iforall a. Num a => a -> a -> a
+Int
2 then [] else
                                  let c2 :: Word8
c2 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iforall a. Num a => a -> a -> a
+Int
1)
                                      c3 :: Word8
c3 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iforall a. Num a => a -> a -> a
+Int
2)
                                  in forall b a. (Num b, Integral a) => a -> b
fi(Word8
c1forall a. Bits a => a -> a -> a
.&.Word8
0x1f)forall a. Bits a => a -> Int -> a
`shiftL`Int
12
                                      forall a. Bits a => a -> a -> a
`xor`forall b a. (Num b, Integral a) => a -> b
fi(Word8
c2forall a. Bits a => a -> a -> a
.&.Word8
0x3f)forall a. Bits a => a -> Int -> a
`shiftL`Int
6
                                        forall a. Bits a => a -> a -> a
`xor`forall b a. (Num b, Integral a) => a -> b
fi(Word8
c3forall a. Bits a => a -> a -> a
.&.Word8
0x3f)
                                          forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Word32]
go (Int
iforall a. Num a => a -> a -> a
+Int
3) Int
len ByteString
s
                            Int
4 -> if Int
len forall a. Ord a => a -> a -> Bool
<= Int
iforall a. Num a => a -> a -> a
+Int
3 then [] else
                                  let c2 :: Word8
c2 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iforall a. Num a => a -> a -> a
+Int
1)
                                      c3 :: Word8
c3 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iforall a. Num a => a -> a -> a
+Int
2)
                                      c4 :: Word8
c4 = ByteString -> Int -> Word8
unsafeIndex ByteString
s (Int
iforall a. Num a => a -> a -> a
+Int
3)
                                  in forall b a. (Num b, Integral a) => a -> b
fi(Word8
c1forall a. Bits a => a -> a -> a
.&.Word8
0x1f)forall a. Bits a => a -> Int -> a
`shiftL`Int
18
                                      forall a. Bits a => a -> a -> a
`xor`forall b a. (Num b, Integral a) => a -> b
fi(Word8
c2forall a. Bits a => a -> a -> a
.&.Word8
0x3f)forall a. Bits a => a -> Int -> a
`shiftL`Int
12
                                        forall a. Bits a => a -> a -> a
`xor`forall b a. (Num b, Integral a) => a -> b
fi(Word8
c3forall a. Bits a => a -> a -> a
.&.Word8
0x3f)forall a. Bits a => a -> Int -> a
`shiftL`Int
6
                                          forall a. Bits a => a -> a -> a
`xor`forall b a. (Num b, Integral a) => a -> b
fi(Word8
c4forall a. Bits a => a -> a -> a
.&.Word8
0x3f)
                                            forall a. a -> [a] -> [a]
: Int -> Int -> ByteString -> [Word32]
go (Int
iforall a. Num a => a -> a -> a
+Int
4) Int
len ByteString
s
                            Int
_ -> forall a. HasCallStack => [Char] -> a
error [Char]
"decodeUTF8: len > 4"

#endif


-----------------------------------------------------------------------------

-- misc debug stuff

toBits :: Word8 -> [Word8]
toBits :: Word8 -> [Word8]
toBits Word8
w8 = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((forall a. Bits a => a -> a -> a
.&.Word8
0x01) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Word8
w8forall a. Bits a => a -> Int -> a
`shiftR`)) [Int
7,Int
6,Int
5,Int
4,Int
3,Int
2,Int
1,Int
0]

fromBits :: [Word8] -> Word8
fromBits :: [Word8] -> Word8
fromBits = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\Word8
a (Int
n,Word8
b) -> Word8
aforall a. Bits a => a -> a -> a
.|.Word8
bforall a. Bits a => a -> Int -> a
`shiftL`Int
n) Word8
0
            forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..Int
7] forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse

hexTab :: ByteString
hexTab :: ByteString
hexTab = [Word8] -> ByteString
B.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Char -> Word8
B.c2w forall a b. (a -> b) -> a -> b
$
  [Char]
"0123456789abcdef"

showHex :: Int -> String
showHex :: Int -> [Char]
showHex Int
i = ([Char]
"0x"forall a. [a] -> [a] -> [a]
++)
  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Int
28,Int
24,Int
20,Int
16,Int
12,Int
8,Int
4,Int
0] forall a b. (a -> b) -> a -> b
$ \Int
n ->
    Word8 -> Char
B.w2c (ByteString -> Int -> Word8
unsafeIndex ByteString
hexTab (Int
iforall a. Bits a => a -> Int -> a
`shiftR`Int
nforall a. Bits a => a -> a -> a
.&.Int
0xf))

-----------------------------------------------------------------------------

-- now, for fun...

{- |
> ghci> putUTF8Ln $ flipUTF8 "[?np_bs!]"
> [¡sq‾bu¿]
-}
flipUTF8 :: (UTF8 a) => a -> a
flipUTF8 :: forall a. UTF8 a => a -> a
flipUTF8 = forall a. UTF8 a => ByteString -> a
decode forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Int)] -> ByteString -> ByteString
flipString [(Int, Int)]
flipTab forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => a -> ByteString
encode

{- |
> ghci> putUTF8Ln $ (unflipUTF8 . flipUTF8) "[?np_bs!]"
> [?np_bs!]
-}
unflipUTF8 :: (UTF8 a) => a -> a
unflipUTF8 :: forall a. UTF8 a => a -> a
unflipUTF8 = forall a. UTF8 a => ByteString -> a
decode forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, Int)] -> ByteString -> ByteString
flipString [(Int, Int)]
unflipTab forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => a -> ByteString
encode

-- | Omits chars it doesn't know how to flip. Possibly
--  it's more desirable to just be id on such chars?
flipString :: [(Int,Int)] -> ByteString -> ByteString
flipString :: [(Int, Int)] -> ByteString -> ByteString
flipString [(Int, Int)]
tab = forall a. UTF8 a => a -> ByteString
encode
                  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse
                    forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall b a. b -> (a -> b) -> Maybe a -> b
maybe Char
' ' Int -> Char
chr
                              forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup [(Int, Int)]
tab)
                      forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. UTF8 a => ByteString -> a
decode

unflipTab :: [(Int,Int)]
unflipTab :: [(Int, Int)]
unflipTab = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry(forall a b c. (a -> b -> c) -> b -> a -> c
flip(,))) [(Int, Int)]
flipTab

flipTab :: [(Int,Int)]
flipTab :: [(Int, Int)]
flipTab = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Char
a,Int
b)->(Char -> Int
ord Char
a,Int
b))
  [(Char
'a', Int
0x250)
  ,(Char
'b', Char -> Int
ord Char
'q')
  ,(Char
'c', Int
0x254)
  ,(Char
'd', Char -> Int
ord Char
'p')
  ,(Char
'e', Int
0x1dd)
  ,(Char
'f', Int
0x25f)
  ,(Char
'g', Int
0x183)
  ,(Char
'h', Int
0x265)
  ,(Char
'i', Int
0x131)
  ,(Char
'j', Int
0x27e)
  ,(Char
'k', Int
0x29e)
  ,(Char
'l', Char -> Int
ord Char
'l')
  ,(Char
'm', Int
0x26f)
  ,(Char
'n', Char -> Int
ord Char
'u')
  ,(Char
'o', Char -> Int
ord Char
'o')
  ,(Char
'p', Char -> Int
ord Char
'b')
  ,(Char
'q', Char -> Int
ord Char
'd')
  ,(Char
'r', Int
0x279)
  ,(Char
's', Char -> Int
ord Char
's')
  ,(Char
't', Int
0x287)
  ,(Char
'u', Char -> Int
ord Char
'n')
  ,(Char
'v', Int
0x28c)
  ,(Char
'w', Int
0x28d)
  ,(Char
'x', Char -> Int
ord Char
'x')
  ,(Char
'y', Int
0x28e)
  ,(Char
'z', Char -> Int
ord Char
'z')
  ,(Char
'.', Int
0x2d9)
  ,(Char
'[', Char -> Int
ord Char
']')
  ,(Char
']', Char -> Int
ord Char
'[')
  ,(Char
'{', Char -> Int
ord Char
'}')
  ,(Char
'}', Char -> Int
ord Char
'{')
  ,(Char
'<', Char -> Int
ord Char
'>')
  ,(Char
'>', Char -> Int
ord Char
'<')
  ,(Char
'?', Int
0xbf)
  ,(Char
'!', Int
0xa1)
  ,(Char
'\'', Char -> Int
ord Char
',')
  ,(Char
'_', Int
0x203e)
  ,(Char
';', Int
0x061b)
  ]

{-
ghci> mapM_ print . zip (fmap show [0..9] ++ fmap (:[]) ['a'..'f']) . fmap (drop 4 . toBits) $ [0..15]
("0",[0,0,0,0])
("1",[0,0,0,1])
("2",[0,0,1,0])
("3",[0,0,1,1])
("4",[0,1,0,0])
("5",[0,1,0,1])
("6",[0,1,1,0])
("7",[0,1,1,1])
("8",[1,0,0,0])
("9",[1,0,0,1])
("a",[1,0,1,0])
("b",[1,0,1,1])
("c",[1,1,0,0])
("d",[1,1,0,1])
("e",[1,1,1,0])
("f",[1,1,1,1])

class (Num a) => Bits a where
  (.&.) :: a -> a -> a
  (.|.) :: a -> a -> a
  xor :: a -> a -> a
  complement :: a -> a
  shift :: a -> Int -> a
  rotate :: a -> Int -> a
  bit :: Int -> a
  setBit :: a -> Int -> a
  clearBit :: a -> Int -> a
  complementBit :: a -> Int -> a
  testBit :: a -> Int -> Bool
  bitSize :: a -> Int
  isSigned :: a -> Bool
  shiftL :: a -> Int -> a
  shiftR :: a -> Int -> a
  rotateL :: a -> Int -> a
  rotateR :: a -> Int -> a

uncheckedIShiftL#   :: Int# -> Int# -> Int#
uncheckedIShiftRA#  :: Int# -> Int# -> Int#
uncheckedIShiftRL#  :: Int# -> Int# -> Int#
uncheckedShiftL#    :: Word# -> Int# -> Word#
uncheckedShiftRL#   :: Word# -> Int# -> Word#
-}



{-
data Char#
gtChar# :: Char# -> Char# -> Bool
geChar# :: Char# -> Char# -> Bool
eqChar# :: Char# -> Char# -> Bool
neChar# :: Char# -> Char# -> Bool
ltChar# :: Char# -> Char# -> Bool
leChar# :: Char# -> Char# -> Bool
ord# :: Char# -> Int#

data Int#
(+#) :: Int# -> Int# -> Int#
(-#) :: Int# -> Int# -> Int#
(*#) :: Int# -> Int# -> Int#
(>#) :: Int# -> Int# -> Bool
(>=#) :: Int# -> Int# -> Bool
(==#) :: Int# -> Int# -> Bool
(/=#) :: Int# -> Int# -> Bool
(<#) :: Int# -> Int# -> Bool
(<=#) :: Int# -> Int# -> Bool
chr# :: Int# -> Char#
int2Word# :: Int# -> Word#
uncheckedIShiftL# :: Int# -> Int# -> Int#
uncheckedIShiftRA# :: Int# -> Int# -> Int#
uncheckedIShiftRL# :: Int# -> Int# -> Int#

data Word#
plusWord# :: Word# -> Word# -> Word#
minusWord# :: Word# -> Word# -> Word#
timesWord# :: Word# -> Word# -> Word#
and# :: Word# -> Word# -> Word#
or# :: Word# -> Word# -> Word#
xor# :: Word# -> Word# -> Word#
not# :: Word# -> Word#
uncheckedShiftL# :: Word# -> Int# -> Word#
uncheckedShiftRL# :: Word# -> Int# -> Word#
word2Int# :: Word# -> Int#
gtWord# :: Word# -> Word# -> Bool
geWord# :: Word# -> Word# -> Bool
eqWord# :: Word# -> Word# -> Bool
neWord# :: Word# -> Word# -> Bool
ltWord# :: Word# -> Word# -> Bool
leWord# :: Word# -> Word# -> Bool
narrow8Int# :: Int# -> Int#
narrow16Int# :: Int# -> Int#
narrow32Int# :: Int# -> Int#
narrow8Word# :: Word# -> Word#
narrow16Word# :: Word# -> Word#
narrow32Word# :: Word# -> Word#

data MutByteArr# s
newByteArray# :: Int# -> State# s -> (#State# s, MutByteArr# s#)
newPinnedByteArray# :: Int# -> State# s -> (#State# s, MutByteArr# s#)
byteArrayContents# :: ByteArr# -> Addr#
sameMutableByteArray# :: MutByteArr# s -> MutByteArr# s -> Bool
unsafeFreezeByteArray# :: MutByteArr# s -> State# s -> (#State# s, ByteArr##)
sizeofByteArray# :: ByteArr# -> Int#
sizeofMutableByteArray# :: MutByteArr# s -> Int#
indexCharArray# :: ByteArr# -> Int# -> Char#
indexWideCharArray# :: ByteArr# -> Int# -> Char#
indexIntArray# :: ByteArr# -> Int# -> Int#
indexWordArray# :: ByteArr# -> Int# -> Word#
indexAddrArray# :: ByteArr# -> Int# -> Addr#
indexInt8Array# :: ByteArr# -> Int# -> Int#
indexInt16Array# :: ByteArr# -> Int# -> Int#
indexInt32Array# :: ByteArr# -> Int# -> Int#
indexInt64Array# :: ByteArr# -> Int# -> Int#
indexWord8Array# :: ByteArr# -> Int# -> Word#
indexWord16Array# :: ByteArr# -> Int# -> Word#
indexWord32Array# :: ByteArr# -> Int# -> Word#
indexWord64Array# :: ByteArr# -> Int# -> Word#
readCharArray# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Char##)
readWideCharArray# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Char##)
readIntArray# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Int##)
readWordArray# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Word##)
readAddrArray# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Addr##)
readInt8Array# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Int##)
readInt16Array# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Int##)
readInt32Array# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Int##)
readInt64Array# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Int##)
readWord8Array# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Word##)
readWord16Array# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Word##)
readWord32Array# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Word##)
readWord64Array# :: MutByteArr# s -> Int# -> State# s -> (#State# s, Word##)
writeCharArray# :: MutByteArr# s -> Int# -> Char# -> State# s -> State# s
writeWideCharArray# :: MutByteArr# s -> Int# -> Char# -> State# s -> State# s
writeIntArray# :: MutByteArr# s -> Int# -> Int# -> State# s -> State# s
writeWordArray# :: MutByteArr# s -> Int# -> Word# -> State# s -> State# s
writeAddrArray# :: MutByteArr# s -> Int# -> Addr# -> State# s -> State# s
writeInt8Array# :: MutByteArr# s -> Int# -> Int# -> State# s -> State# s
writeInt16Array# :: MutByteArr# s -> Int# -> Int# -> State# s -> State# s
writeInt32Array# :: MutByteArr# s -> Int# -> Int# -> State# s -> State# s
writeInt64Array# :: MutByteArr# s -> Int# -> Int# -> State# s -> State# s
writeWord8Array# :: MutByteArr# s -> Int# -> Word# -> State# s -> State# s
writeWord16Array# :: MutByteArr# s -> Int# -> Word# -> State# s -> State# s
writeWord32Array# :: MutByteArr# s -> Int# -> Word# -> State# s -> State# s
writeWord64Array# :: MutByteArr# s -> Int# -> Word# -> State# s -> State# s

data Addr#
nullAddr# :: Addr#
plusAddr# :: Addr# -> Int# -> Addr#
minusAddr# :: Addr# -> Addr# -> Int#
remAddr# :: Addr# -> Int# -> Int#
addr2Int# :: Addr# -> Int#
int2Addr# :: Int# -> Addr#
gtAddr# :: Addr# -> Addr# -> Bool
geAddr# :: Addr# -> Addr# -> Bool
eqAddr# :: Addr# -> Addr# -> Bool
neAddr# :: Addr# -> Addr# -> Bool
ltAddr# :: Addr# -> Addr# -> Bool
leAddr# :: Addr# -> Addr# -> Bool
indexCharOffAddr# :: Addr# -> Int# -> Char#
indexWideCharOffAddr# :: Addr# -> Int# -> Char#
indexIntOffAddr# :: Addr# -> Int# -> Int#
indexWordOffAddr# :: Addr# -> Int# -> Word#
indexAddrOffAddr# :: Addr# -> Int# -> Addr#
indexInt8OffAddr# :: Addr# -> Int# -> Int#
indexInt16OffAddr# :: Addr# -> Int# -> Int#
indexInt32OffAddr# :: Addr# -> Int# -> Int#
indexInt64OffAddr# :: Addr# -> Int# -> Int#
indexWord8OffAddr# :: Addr# -> Int# -> Word#
indexWord16OffAddr# :: Addr# -> Int# -> Word#
indexWord32OffAddr# :: Addr# -> Int# -> Word#
indexWord64OffAddr# :: Addr# -> Int# -> Word#
readCharOffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Char##)
readWideCharOffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Char##)
readIntOffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Int##)
readWordOffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Word##)
readAddrOffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Addr##)
readInt8OffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Int##)
readInt16OffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Int##)
readInt32OffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Int##)
readInt64OffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Int##)
readWord8OffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Word##)
readWord16OffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Word##)
readWord32OffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Word##)
readWord64OffAddr# :: Addr# -> Int# -> State# s -> (#State# s, Word##)
writeCharOffAddr# :: Addr# -> Int# -> Char# -> State# s -> State# s
writeWideCharOffAddr# :: Addr# -> Int# -> Char# -> State# s -> State# s
writeIntOffAddr# :: Addr# -> Int# -> Int# -> State# s -> State# s
writeWordOffAddr# :: Addr# -> Int# -> Word# -> State# s -> State# s
writeAddrOffAddr# :: Addr# -> Int# -> Addr# -> State# s -> State# s
writeInt8OffAddr# :: Addr# -> Int# -> Int# -> State# s -> State# s
writeInt16OffAddr# :: Addr# -> Int# -> Int# -> State# s -> State# s
writeInt32OffAddr# :: Addr# -> Int# -> Int# -> State# s -> State# s
writeInt64OffAddr# :: Addr# -> Int# -> Int# -> State# s -> State# s
writeWord8OffAddr# :: Addr# -> Int# -> Word# -> State# s -> State# s
writeWord16OffAddr# :: Addr# -> Int# -> Word# -> State# s -> State# s
writeWord32OffAddr# :: Addr# -> Int# -> Word# -> State# s -> State# s
writeWord64OffAddr# :: Addr# -> Int# -> Word# -> State# s -> State# s

data State# s
data RealWorld

dataToTag# :: a -> Int#
tagToEnum# :: Int# -> a

reallyUnsafePtrEquality# :: a -> a -> Int#

data BCO#
addrToHValue# :: Addr# -> (#a#)
mkApUpd0# :: BCO# -> (#a#)
newBCO# :: ByteArr# -> ByteArr# -> Array# a -> Int# -> ByteArr# -> State# s -> (#State# s, BCO##)
unpackClosure# :: a -> (#Addr#, Array# b, ByteArr##)
getApStackVal# :: a -> Int# -> (#Int#, b#)
seq :: a -> b -> b
inline :: a -> a
lazy :: a -> a

data Any a
unsafeCoerce# :: a -> b



--------GHC.Exts

data Int = I# Int#
data Word = W# Word#

data Char = C# Char#
data Ptr a = Ptr Addr#
data FunPtr a = FunPtr Addr#

shiftL# :: Word# -> Int# -> Word#
shiftRL# :: Word# -> Int# -> Word#
iShiftL# :: Int# -> Int# -> Int#
iShiftRA# :: Int# -> Int# -> Int#
iShiftRL# :: Int# -> Int# -> Int#
-}