{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE MagicHash                  #-}
{-# LANGUAGE NoImplicitPrelude          #-}
{-# LANGUAGE TypeFamilies               #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE CPP                        #-}
module Basement.UTF8.Helper
    where
import           Basement.Compat.Base
import           Basement.Compat.Primitive
import           Basement.Types.OffsetSize
import           Basement.UTF8.Types
import           GHC.Prim
import           GHC.Types
import           GHC.Word
maskContinuation# :: Word# -> Word#
maskContinuation# v = and# v 0x3f##
{-# INLINE maskContinuation# #-}
maskHeader2# :: Word# -> Word#
maskHeader2# h = and# h 0x1f##
{-# INLINE maskHeader2# #-}
maskHeader3# :: Word# -> Word#
maskHeader3# h = and# h 0xf##
{-# INLINE maskHeader3# #-}
maskHeader4# :: Word# -> Word#
maskHeader4# h = and# h 0x7##
{-# INLINE maskHeader4# #-}
or3# :: Word# -> Word# -> Word# -> Word#
or3# a b c = or# a (or# b c)
{-# INLINE or3# #-}
or4# :: Word# -> Word# -> Word# -> Word# -> Word#
or4# a b c d = or# (or# a b) (or# c d)
{-# INLINE or4# #-}
toChar# :: Word# -> Char
toChar# w = C# (chr# (word2Int# w))
{-# INLINE toChar# #-}
toChar1 :: StepASCII -> Char
toChar1 (StepASCII (W8# w)) = toChar# w
toChar2 :: StepASCII -> Word8 -> Char
toChar2 (StepASCII (W8# w1)) (W8# w2) =
    toChar# (or# (uncheckedShiftL# (maskHeader2# w1) 6#) (maskContinuation# w2))
toChar3 :: StepASCII -> Word8 -> Word8 -> Char
toChar3 (StepASCII (W8# w1)) (W8# w2) (W8# w3) =
    toChar# (or3# (uncheckedShiftL# (maskHeader3# w1) 12#)
                  (uncheckedShiftL# (maskContinuation# w2) 6#)
                  (maskContinuation# w3)
            )
toChar4 :: StepASCII -> Word8 -> Word8 -> Word8 -> Char
toChar4 (StepASCII (W8# w1)) (W8# w2) (W8# w3) (W8# w4) =
    toChar# (or4# (uncheckedShiftL# (maskHeader4# w1) 18#)
                  (uncheckedShiftL# (maskContinuation# w2) 12#)
                  (uncheckedShiftL# (maskContinuation# w3) 6#)
                  (maskContinuation# w4)
            )
data UTF8Char =
      UTF8_1 {-# UNPACK #-} !Word8
    | UTF8_2 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
    | UTF8_3 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
    | UTF8_4 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
asUTF8Char :: Char -> UTF8Char
asUTF8Char !(C# c)
  | bool# (ltWord# x 0x80##   ) = encode1
  | bool# (ltWord# x 0x800##  ) = encode2
  | bool# (ltWord# x 0x10000##) = encode3
  | otherwise                   = encode4
    where
      !x = int2Word# (ord# c)
      encode1 = UTF8_1 (W8# x)
      encode2 =
          let !x1 = W8# (or# (uncheckedShiftRL# x 6#) 0xc0##)
              !x2 = toContinuation x
           in UTF8_2 x1 x2
      encode3 =
          let !x1 = W8# (or# (uncheckedShiftRL# x 12#) 0xe0##)
              !x2 = toContinuation (uncheckedShiftRL# x 6#)
              !x3 = toContinuation x
           in UTF8_3 x1 x2 x3
      encode4 =
          let !x1 = W8# (or# (uncheckedShiftRL# x 18#) 0xf0##)
              !x2 = toContinuation (uncheckedShiftRL# x 12#)
              !x3 = toContinuation (uncheckedShiftRL# x 6#)
              !x4 = toContinuation x
           in UTF8_4 x1 x2 x3 x4
      toContinuation :: Word# -> Word8
      toContinuation w = W8# (or# (and# w 0x3f##) 0x80##)
      {-# INLINE toContinuation #-}
numBytes :: UTF8Char -> CountOf Word8
numBytes UTF8_1{} = CountOf 1
numBytes UTF8_2{} = CountOf 2
numBytes UTF8_3{} = CountOf 3
numBytes UTF8_4{} = CountOf 4
skipNextHeaderValue :: Word8 -> CountOf Word8
skipNextHeaderValue !x
    | x < 0xC0  = CountOf 1 
    | x < 0xE0  = CountOf 2 
    | x < 0xF0  = CountOf 3 
    | otherwise = CountOf 4
{-# INLINE skipNextHeaderValue #-}
headerIsAscii :: StepASCII -> Bool
headerIsAscii (StepASCII x) = x < 0x80
charToBytes :: Int -> CountOf Word8
charToBytes c
    | c < 0x80     = CountOf 1
    | c < 0x800    = CountOf 2
    | c < 0x10000  = CountOf 3
    | c < 0x110000 = CountOf 4
    | otherwise    = error ("invalid code point: " `mappend` show c)
encodeCharUTF8 :: Char -> CharUTF8
encodeCharUTF8 !(C# c)
    | bool# (ltWord# x 0x80##   ) = CharUTF8 (W32# x)
    | bool# (ltWord# x 0x800##  ) = CharUTF8 encode2
    | bool# (ltWord# x 0x10000##) = CharUTF8 encode3
    | otherwise                   = CharUTF8 encode4
  where
    !x = int2Word# (ord# c)
    
    mask2 = 0x0000bfdf## 
    mask3 = 0x00bfbfef## 
    mask4 = 0xbfbfbff7## 
    
    set2  = 0x000080c0## 
    set3  = 0x008080e0## 
    set4  = 0x808080f0## 
    encode2 = W32# (and# mask2 (or3# set2
                                     (uncheckedShiftRL# x 6#) 
                                     (uncheckedShiftL# x 8# ) 
                               ))
    encode3 = W32# (and# mask3 (or4# set3
                                     (uncheckedShiftRL# x 12#) 
                                     (and# 0x3f00## (uncheckedShiftL# x 2#)) 
                                     (uncheckedShiftL# x 16# ) 
                               ))
    encode4 = W32# (and# mask4 (or4# set4
                                     (uncheckedShiftRL# x 18#) 
                                     (or# (and# 0x3f00## (uncheckedShiftRL# x 4#))   
                                          (and# 0x3f0000## (uncheckedShiftL# x 10#)) 
                                     )
                                     (uncheckedShiftL# x 24# ) 
                               ))
decodeCharUTF8 :: CharUTF8 -> Char
decodeCharUTF8 c@(CharUTF8 !(W32# w))
    | isCharUTF8Case1 c = toChar# w
    | isCharUTF8Case2 c = encode2
    | isCharUTF8Case3 c = encode3
    | otherwise         = encode4
  where
    encode2 =
        toChar# (or# (uncheckedShiftL# (maskHeader2# w) 6#)
                     (maskContinuation# (uncheckedShiftRL# w 8#))
                )
    encode3 =
        toChar# (or3# (uncheckedShiftL# (maskHeader3# w) 12#)
                      (uncheckedShiftRL# (and# 0x3f00## w) 8#)
                      (maskContinuation# (uncheckedShiftRL# w 16#))
                )
    encode4 =
        toChar# (or4# (uncheckedShiftL#  (maskHeader4# w) 18#)
                      (uncheckedShiftRL# (and# 0x3f00## w) 10#)
                      (uncheckedShiftL#  (and# 0x3f0000## w) 4#)
                      (maskContinuation# (uncheckedShiftRL# w 24#))
                )
    
    
    
    
isCharUTF8Case1 :: CharUTF8 -> Bool
isCharUTF8Case1 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x80##) 0##)
{-# INLINE isCharUTF8Case1 #-}
isCharUTF8Case2 :: CharUTF8 -> Bool
isCharUTF8Case2 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x20##) 0##)
{-# INLINE isCharUTF8Case2 #-}
isCharUTF8Case3 :: CharUTF8 -> Bool
isCharUTF8Case3 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x10##) 0##)
{-# INLINE isCharUTF8Case3 #-}
isCharUTF8Case4 :: CharUTF8 -> Bool
isCharUTF8Case4 (CharUTF8 !(W32# w)) = bool# (eqWord# (and# w 0x08##) 0##)
{-# INLINE isCharUTF8Case4 #-}