-- |
-- Module      : Auxiliary
-- License     : BSD-3-Clause
-- Copyright   : (c) 2025 Olivier Chéron
--
-- ML-KEM auxiliary functions
--
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}
module Auxiliary
    ( Zq, Rq, Tq, (..+), (..-)
    , ntt, nttInv, rcompress, rdecompress
    , byteEncode, byteDecode, byteEncode12, byteDecode12
    , byteEncode1, byteDecode1, sampleNTT, samplePolyCBD
#ifdef ML_KEM_TESTING
    , compress, decompress
    , bitRev7, fromZq, toZq, fromCoeffs, toCoeffs
#endif
    ) where

import Basement.NormalForm
import Basement.PrimType
import Basement.Types.OffsetSize

import Crypto.Hash.Algorithms

import Data.ByteArray (ByteArrayAccess, Bytes, View)
import qualified Data.ByteArray as B

import Control.DeepSeq (NFData(..))
import Control.Monad
import Control.Monad.ST

import Data.Bits
import Data.Proxy
import Data.Word

import GHC.TypeNats

import Foreign.Ptr (Ptr, plusPtr)
import Foreign.Storable (pokeByteOff)

import Unsafe.Coerce

import Block (blockIndex)
import BlockN (BlockN, MutableBlockN)
import Builder (Builder)
import Crypto (BlockDigest)
import Machine
import Marking (Classified, SecurityMarking(..), Leak(..))
import SecureBlock (SecureBlock)
import SecureBytes (SecureBytes)
import qualified BlockN
import qualified Builder
import qualified ByteArrayST as ST
import qualified Crypto
import Math

type N = 256

n :: Int
n = 256

q :: Integer
q = 3329

q16 :: Word16
q16 = fromInteger q

q32 :: Word32
q32 = fromInteger q

q64 :: Word64
q64 = fromInteger q

bitRev7 :: Word8 -> Word8
bitRev7 b =
    (b `unsafeShiftR` 6 .&. 1) .|.
    (b `unsafeShiftR` 5 .&. 1) `unsafeShiftL` 1 .|.
    (b `unsafeShiftR` 4 .&. 1) `unsafeShiftL` 2 .|.
    (b `unsafeShiftR` 3 .&. 1) `unsafeShiftL` 3 .|.
    (b `unsafeShiftR` 2 .&. 1) `unsafeShiftL` 4 .|.
    (b `unsafeShiftR` 1 .&. 1) `unsafeShiftL` 5 .|.
    (b .&. 1) `unsafeShiftL` 6

-- Reduction 𝑥 mod 𝑞 for 0 ≤ 𝑥 < 2𝑞
reduceSimple :: Word16 -> Word16
reduceSimple x = (mask .&. x) .|. (complement mask .&. subtracted)
  where
    subtracted = x - q16
    mask = negate (subtracted `unsafeShiftR` 15)
{-# INLINE reduceSimple #-}

-- Reduction 𝑥 mod 𝑞 for 0 ≤ 𝑥 < 2𝑞² + 𝑞
reduce :: Word32 -> Word16
reduce x = reduceSimple (fromIntegral remainder)
  where
    p = fromIntegral x * ((1 `unsafeShiftL` 24) `div` q64)
    quotient = fromIntegral (p `unsafeShiftR` 24)
    remainder = x - quotient * q32
{-# INLINE reduce #-}

newtype Zq = Zq Word16
#ifdef ML_KEM_TESTING
    deriving (Eq, Show)
#else
    deriving Eq
#endif

instance PrimType Zq where
    type PrimSize Zq = 2
    primSizeInBytes _ = primSizeInBytes (Proxy :: Proxy Word16)
    {-# INLINE primSizeInBytes #-}
    primShiftToBytes _ = primShiftToBytes (Proxy :: Proxy Word16)
    {-# INLINE primShiftToBytes #-}
    primBaUIndex ba (Offset i) = Zq (primBaUIndex ba (Offset i))
    {-# INLINE primBaUIndex #-}
    primMbaURead mba (Offset i) = Zq <$> primMbaURead mba (Offset i)
    {-# INLINE primMbaURead #-}
    primMbaUWrite mba (Offset i) (Zq a) = primMbaUWrite mba (Offset i) a
    {-# INLINE primMbaUWrite #-}
    primAddrIndex addr (Offset i) = Zq (primAddrIndex addr (Offset i))
    {-# INLINE primAddrIndex #-}
    primAddrRead addr (Offset i) = Zq <$> primAddrRead addr (Offset i)
    {-# INLINE primAddrRead #-}
    primAddrWrite addr (Offset i) (Zq a) = primAddrWrite addr (Offset i) a
    {-# INLINE primAddrWrite #-}

instance Add Zq where
    zero = Zq 0
    Zq a .+ Zq b = Zq $ reduceSimple (a + b)
    Zq a .- Zq b = Zq $ reduceSimple (a + q16 - b)
    neg (Zq a) = Zq $ reduceSimple (q16 - a)

instance Mul Zq where
    one = Zq 1
    Zq a .* Zq b = Zq $ reduce (fromIntegral a * fromIntegral b)

#ifdef ML_KEM_TESTING
instance MulAdd Zq where
    mulAdd (Zq a) (Zq b) (Zq c) = Zq $ reduce $
        fromIntegral a * fromIntegral b + fromIntegral c

instance BiMul Zq Zq where
    (..*) = (.*)

instance BiMulAdd Zq Zq where
    biMulAdd = mulAdd

fromZq :: Zq -> Word16
fromZq (Zq a) = a
#endif

toZq :: Word16 -> Zq
toZq = Zq . reduce . fromIntegral

newtype Rq marking = Rq (BlockN marking N Zq)
#ifdef ML_KEM_TESTING
    deriving (Eq, Show)
#endif

instance Classified marking => Add (Rq marking) where
    zero = Rq zero
    Rq a .+ Rq b = Rq (a .+ b)
    Rq a .- Rq b = Rq (a .- b)
    neg (Rq a) = Rq (neg a)
    {-# SPECIALIZE instance Add (Rq Sec) #-}
    {-# SPECIALIZE instance Add (Rq Pub) #-}

infixl 6 ..+, ..-

-- Transformation called only at expected location in the LWE problem, after
-- adding noise to secret information.
(..+) :: Rq Sec -> Rq Sec -> Rq Pub
Rq a ..+ Rq b = Rq $ BlockN.zipWith (.+) a b
{-# NOINLINE (..+) #-}

(..-) :: Rq Pub -> Rq Sec -> Rq Sec
Rq a ..- Rq b = Rq $ BlockN.zipWith (.-) a b
{-# NOINLINE (..-) #-}

instance Leak Rq

#ifdef ML_KEM_TESTING
fromCoeffs :: [Zq] -> Maybe (Rq Sec)
fromCoeffs = fmap Rq . BlockN.fromList

toCoeffs :: Rq Sec -> [Zq]
toCoeffs (Rq a) = BlockN.toList a
#endif

newtype Tq marking = Tq (BlockN marking N Zq)
#ifdef ML_KEM_TESTING
    deriving (Eq, Show)
#endif

instance Classified marking => Add (Tq marking) where
    zero = Tq zero
    Tq a .+ Tq b = Tq (a .+ b)
    Tq a .- Tq b = Tq (a .- b)
    neg (Tq a) = Tq (neg a)
    {-# SPECIALIZE instance Add (Tq Sec) #-}
    {-# SPECIALIZE instance Add (Tq Pub) #-}

instance Leak Tq

instance NFData (Tq marking) where
    rnf (Tq a) = toNormalForm a

instance BiMul (Tq Pub) (Tq Sec) where
    (..*) = multiplyNTTs
    {-# NOINLINE (..*) #-}

instance BiMulAdd (Tq Pub) (Tq Sec) where
    biMulAdd = multiplyNTTsAdd
    {-# NOINLINE biMulAdd #-}

#ifdef ML_KEM_TESTING
instance Mul (Tq Sec) where
    one = Tq $ BlockN.create $ \(Offset i) -> if even i then one else zero
    (.*) = (..*) . leak

instance MulAdd (Tq Sec) where
    mulAdd = biMulAdd . leak
#endif

instance Crypto.ConstEqW (Tq Sec) where
    constEqW (Tq a) (Tq b) = Crypto.constEqW
        (BlockN.unsafeCast a :: SecureBlock Sec Word)
        (BlockN.unsafeCast b :: SecureBlock Sec Word)

instance Crypto.ConstEqW (Tq Pub) where
    constEqW (Tq a) (Tq b) = Crypto.constEqW
        (BlockN.unsafeCast a :: SecureBlock Pub Word)
        (BlockN.unsafeCast b :: SecureBlock Pub Word)

-- Computes the NTT representation of the given polynomial
ntt :: Classified marking => Rq marking -> Tq marking
ntt (Rq !a) = runST $ do
    b <- BlockN.thaw a
    outer b 1 128
    Tq <$> BlockN.unsafeFreeze b
  where
    outer !b !i len = when (len >= 2) $ inner b i len 0

    inner !b !i !len start
        | start < 256 = do
            let zeta = BlockN.index zetaPowBitRev i -- 17 ^ bitRev7 i
            loop b zeta (start + len) len start
            inner b (i + 1) len (start + offsetShiftL 1 len)
        | otherwise = outer b i (offsetShiftR 1 len)

    loop !b !zeta end len j =
        when (j < end) $ do
            t <- (zeta .*) <$> BlockN.read b (j + len)
            x <- BlockN.read b j
            BlockN.write b (j + len) (x .- t)
            BlockN.write b j (x .+ t)
            loop b zeta end len (j + 1)
{-# SPECIALIZE ntt :: Rq Sec -> Tq Sec #-}
{-# SPECIALIZE ntt :: Rq Pub -> Tq Pub #-}

-- Computes the polynomial that corresponds to the given NTT representation
nttInv :: Tq Sec -> Rq Sec
nttInv (Tq !a) = runST $ do
    b <- BlockN.thaw a
    outer b 127 2
    BlockN.iterModify (\x -> x .* Zq 3303) b
    Rq <$> BlockN.unsafeFreeze b
  where
    outer !b !i len = when (len <= 128) $ inner b i len 0

    inner !b !i !len start
        | start < 256 = do
            let zeta = BlockN.index zetaPowBitRev i -- 17 ^ bitRev7 i
            loop b zeta (start + len) len start
            inner b (i - 1) len (start + offsetShiftL 1 len)
        | otherwise = outer b i (offsetShiftL 1 len)

    loop !b !zeta end len j =
        when (j < end) $ do
            t <- BlockN.read b j
            x <- BlockN.read b (j + len)
            BlockN.write b j (t .+ x)
            BlockN.write b (j + len) (zeta .* (x .- t))
            loop b zeta end len (j + 1)

-- Computes the product of two NTT representations
multiplyNTTs :: Tq Pub -> Tq Sec -> Tq Sec
multiplyNTTs (Tq !f) (Tq !g) = runST $ do
    b <- BlockN.new (Proxy :: Proxy Sec)
    loop b 0
    Tq <$> BlockN.unsafeFreeze b
  where
    loop :: MutableBlockN Sec N Zq s -> Offset Zq -> ST s ()
    loop !b i = when (i < 128) $ do
        let ii = offsetShiftL 1 i
            a0 = BlockN.index f ii
            a1 = BlockN.index f (ii + 1)
            b0 = BlockN.index g ii
            b1 = BlockN.index g (ii + 1)
            (c0, c1) = baseCaseMultiply a0 a1 b0 b1 (BlockN.index gamma i)
        BlockN.write b ii c0
        BlockN.write b (ii + 1) c1
        loop b (i + 1)

-- Computes the product of two degree-one polynomials with respect to a quadratic modulus
baseCaseMultiply :: Zq -> Zq -> Zq -> Zq -> Zq -> (Zq, Zq)
baseCaseMultiply (Zq a0) (Zq a1) (Zq b0) (Zq b1) (Zq g) = (Zq c0, Zq c1)
  where
    x `mul` y = fromIntegral x * fromIntegral y
    b1g = reduce (b1 `mul` g)
    !c0 = reduce (a0 `mul` b0 + a1 `mul` b1g)
    !c1 = reduce (a0 `mul` b1 + a1 `mul` b0)

-- Multiply then add a third term
multiplyNTTsAdd :: Tq Pub -> Tq Sec -> Tq Sec -> Tq Sec
multiplyNTTsAdd (Tq !f) (Tq !g) (Tq !h) = runST $ do
    b <- BlockN.new (Proxy :: Proxy Sec)
    loop b 0
    Tq <$> BlockN.unsafeFreeze b
  where
    loop :: MutableBlockN Sec N Zq s -> Offset Zq -> ST s ()
    loop !b i = when (i < 128) $ do
        let ii = offsetShiftL 1 i
            a0 = BlockN.index f ii
            a1 = BlockN.index f (ii + 1)
            b0 = BlockN.index g ii
            b1 = BlockN.index g (ii + 1)
            c0 = BlockN.index h ii
            c1 = BlockN.index h (ii + 1)
            (d0, d1) = baseCaseMultiplyAdd a0 a1 b0 b1 c0 c1 (BlockN.index gamma i)
        BlockN.write b ii d0
        BlockN.write b (ii + 1) d1
        loop b (i + 1)

-- baseCaseMultiply then add a third term
baseCaseMultiplyAdd :: Zq -> Zq -> Zq -> Zq -> Zq -> Zq -> Zq -> (Zq, Zq)
baseCaseMultiplyAdd (Zq a0) (Zq a1) (Zq b0) (Zq b1) (Zq c0) (Zq c1) (Zq g) = (Zq d0, Zq d1)
  where
    x `mul` y = fromIntegral x * fromIntegral y
    b1g = reduce (b1 `mul` g)
    !d0 = reduce (fromIntegral c0 + a0 `mul` b0 + a1 `mul` b1g)
    !d1 = reduce (fromIntegral c1 + a0 `mul` b1 + a1 `mul` b0)

-- Values of 17 ^ BitRev7(𝑖) mod 𝑞 for 𝑖 ∈ {0, … , 127}
zetaPowBitRev :: BlockN Pub 128 Zq
zetaPowBitRev = runST $ do
    out <- BlockN.new (Proxy :: Proxy Pub)
    foldM_ (loop out) one offsets
    BlockN.unsafeFreeze out
  where
    offsets = Prelude.map (fromIntegral . bitRev7) [0 .. 127]
    loop b acc i = BlockN.write b i acc >> return (Zq 17 .* acc)

-- Values of 17 ^ 2.BitRev7(𝑖)+1 mod 𝑞 for 𝑖 ∈ {0, … , 127}
gamma :: BlockN Pub 128 Zq
gamma = BlockN.map (\z -> z .* z .* Zq 17) zetaPowBitRev

-- Compress a field element with 𝑑 < 12
compress :: Int -> Zq -> Word16
compress d (Zq x) = fromIntegral $
    ((fromIntegral x `unsafeShiftL` d + qHalf) * factor) `unsafeShiftR` 34
  where
    qHalf = (q64 + 1) `unsafeShiftR` 1
    factor = (1 `unsafeShiftL` 34) `div` q64
{-# INLINE compress #-}

-- Decompress a field element with 𝑑 < 12
decompress :: Int -> Word16 -> Zq
decompress d y = Zq $ fromIntegral (x2d `unsafeShiftR` d)
  where x2d = fromIntegral y * q32 + (1 `unsafeShiftL` (d - 1))
{-# INLINE decompress #-}

-- Compress a polynomial with 𝑑 < 12
rcompress :: Classified marking => Int -> Rq marking -> BlockN marking N Word16
rcompress !d (Rq a) = BlockN.map (compress d) a
{-# SPECIALIZE NOINLINE rcompress :: Int -> Rq Sec -> BlockN Sec N Word16 #-}
{-# SPECIALIZE NOINLINE rcompress :: Int -> Rq Pub -> BlockN Pub N Word16 #-}

-- Decompress a polynomial with 𝑑 < 12
rdecompress :: Classified marking => Int -> BlockN marking N Word16 -> Rq marking
rdecompress !d = Rq . BlockN.map (decompress d)
{-# SPECIALIZE NOINLINE rdecompress :: Int -> BlockN Sec N Word16 -> Rq Sec #-}
{-# SPECIALIZE NOINLINE rdecompress :: Int -> BlockN Pub N Word16 -> Rq Pub #-}

-- Generates a pseudorandom element of T𝑞 from a seed and two indices
sampleNTT :: SecureBytes Pub -> Word8 -> Word8 -> Tq Pub
sampleNTT seed !x !y = runST $ do
    b <- BlockN.new (Proxy :: Proxy Pub)
    runXof b (280 * 3) 0 0
    Tq <$> BlockN.unsafeFreeze b
  where
    runXof !b !xofLen !pos !j = case someNatVal (fromIntegral (8 * xofLen)) of
        SomeNat proxy -> do
            let bytes = Crypto.unBlockDigest (doHash proxy)
            loop b xofLen bytes pos j

    loop !b !xofLen !bytes !pos j
        | j == 256 = return ()
        | pos >= Offset xofLen = runXof b (xofLen + 56 * 3) pos j
        | otherwise = do
            let c0 = fromIntegral $ blockIndex bytes pos
                c1 = fromIntegral $ blockIndex bytes (pos + 1)
                c2 = fromIntegral $ blockIndex bytes (pos + 2)
                d1 = c0 + (c1 .&. 0xF) `unsafeShiftL` 8
                d2 = (c1 `unsafeShiftR` 4) + (c2 `unsafeShiftL` 4)
            j2 <- poke b j d1
            when (j2 < 256) $ poke b j2 d2 >>= loop b xofLen bytes (pos + 3)

    poke b j d
        | d < q16 = BlockN.write b j (Zq d) >> return (j + 1)
        | otherwise = return j

    doHash :: KnownNat bitlen => proxy bitlen -> BlockDigest (SHAKE128 bitlen)
    doHash _ = Crypto.hashToBlock input

    input :: SecureBytes Pub
    !input = B.unsafeCreate (len + 2) $ \d -> do
        B.copyByteArrayToPtr seed d
        pokeByteOff d len x
        pokeByteOff d (len + 1) y
    len = B.length seed

peekWord :: Ptr WordLE -> ST s WordM
peekWord p = fromLE <$> ST.peek p

peekWordPos :: Ptr WordLE -> BitPos -> ST s WordM
peekWordPos a bp = fromLE <$> ST.peekElemOff a (wordOff bp)

pokeWordPos :: Ptr WordLE -> BitPos -> WordM -> ST s ()
pokeWordPos a bp = ST.pokeElemOff a (wordOff bp) . toLE

newtype BitPos = BitPos Int

zeroPos :: BitPos
zeroPos = BitPos 0

wordOff :: BitPos -> Int
wordOff (BitPos p) = div p wordBits

bitPos :: BitPos -> Int
bitPos (BitPos p) = p .&. (wordBits - 1)

availPos :: Int -> BitPos -> Int
availPos requested (BitPos p) = min available requested
  where available = wordBits - (p .&. (wordBits - 1))

nextPos :: Int -> BitPos -> (Int, BitPos)
nextPos requested (BitPos p) = (howMany, BitPos $ p + howMany)
  where howMany = availPos requested (BitPos p)

getMask :: Int -> WordM
getMask howMany
    | howMany >= wordBits = maxBound
    | otherwise = (1 `unsafeShiftL` howMany) - 1
    -- branch useful only when processing one byte at a time due to
    -- architecture not supporting unaligned memory access

-- Takes a seed as input and outputs a pseudorandom sample from the
-- distribution D_eta
samplePolyCBD :: Word -> SecureBytes Sec -> Rq Sec
samplePolyCBD !eta !input = runST $ ST.withByteArray input $ \p -> do
    f <- BlockN.new (Proxy :: Proxy Sec)
    loop p f 0 zeroPos
    Rq <$> BlockN.unsafeFreeze f
  where
    loop :: Ptr WordLE -> MutableBlockN Sec N Zq s -> Offset Zq -> BitPos -> ST s ()
    loop !p !f !i !bp = when (i < Offset n) $ do
        (xs, bp') <- getBits p bp 0 (fromIntegral eta)
        (ys, bp'') <- getBits p bp' 0 (fromIntegral eta)
        BlockN.write f i (Zq xs .- Zq ys)
        loop p f (i + 1) bp''

    getBits :: Ptr WordLE -> BitPos -> Word16 -> Int -> ST s (Word16, BitPos)
    getBits !p !bp !acc !j
        | j == 0    = return (acc, bp)
        | otherwise = do
            x <- (`unsafeShiftR` bitPos bp) <$> peekWordPos p bp
            let (howMany, bp') = nextPos j bp
                bits = x .&. getMask howMany
            getBits p bp' (acc + fromIntegral (popCount bits)) (j - howMany)

-- Encodes an array of 𝑑-bit integers into a byte array for 1 ≤ 𝑑 ≤ 12
byteEncode :: Int -> BlockN marking N Word16 -> Builder marking
byteEncode d f = Builder.create (32 * d) (runByteEncode d f)
{-# INLINE byteEncode #-}

runByteEncode :: Int -> BlockN marking N Word16 -> Ptr WordLE -> ST s ()
runByteEncode !d !f dst = outer dst zeroPos 0 0
  where
    outer :: Ptr WordLE -> BitPos -> WordM -> Int -> ST s ()
    outer !b !bp !o pos = when (pos < n) $
        inner b pos bp o (BlockN.index f (Offset pos)) d
    {-# NOINLINE outer #-}

    inner :: Ptr WordLE -> Int -> BitPos -> WordM -> Word16 -> Int -> ST s ()
    inner !b !pos !bp !o !a j
        | j == 0 = outer b bp o (pos + 1)
        | bitPos bp + howMany < wordBits = inner b pos bp' o' a' j'
        | otherwise = pokeWordPos b bp o' >> inner b pos bp' 0 a' j'
      where
        (howMany, bp') = nextPos j bp
        x = fromIntegral a .&. getMask howMany
        o' = o .|. (x `unsafeShiftL` bitPos bp)
        a' = a `unsafeShiftR` howMany
        j' = j - howMany

-- Optimization of byteEncode when 𝑑=1
byteEncode1 :: BlockN Sec N Word16 -> Builder Sec
byteEncode1 !f = Builder.create 32 (runByteEncode1 f)
{-# INLINE byteEncode1 #-}

runByteEncode1 :: BlockN marking N Word16 -> Ptr WordLE -> ST s ()
runByteEncode1 !f dst = loop dst 0 0
  where
    loop :: Ptr WordLE -> WordM -> Int -> ST s ()
    loop !b !o pos
        | pos == n = return ()
        | bitPos bp + 1 < wordBits = loop b o' (pos + 1)
        | otherwise = pokeWordPos b bp o' >> loop b 0 (pos + 1)
      where
        bp = BitPos pos
        x = fromIntegral (a .&. 1)
        o' = o .|. (x `unsafeShiftL` bitPos bp)
        a = BlockN.index f (Offset pos)

-- byteEncode with 𝑑=12 after conversion from the field
byteEncode12 :: Tq marking -> Builder marking
byteEncode12 = byteEncode 12 . fromField
  where
    fromField :: Tq marking -> BlockN marking N Word16
    fromField (Tq f) = unsafeCoerce f
{-# INLINE byteEncode12 #-}

-- Decodes a byte array into an array of 𝑑-bit integers for 1 ≤ 𝑑 ≤ 12
byteDecode :: forall marking ba. (Classified marking, ByteArrayAccess ba) => Int -> ba -> BlockN marking N Word16
byteDecode !d !b = runST $
    ST.withByteArray b $ \p -> do
        f <- BlockN.new (Proxy :: Proxy marking)
        outer f p zeroPos 0
        BlockN.unsafeFreeze f
  where
    outer :: MutableBlockN marking N Word16 s -> Ptr WordLE -> BitPos -> Offset Word16 -> ST s ()
    outer !f !p !bp i = when (i < Offset n) $ inner f p i bp 0 0

    inner :: MutableBlockN marking N Word16 s -> Ptr WordLE -> Offset Word16 -> BitPos -> Word16 -> Int -> ST s ()
    inner !f !p !i !bp !v j
        | j == d = BlockN.write f i v >> outer f p bp (i + 1)
        | otherwise = do
            let (howMany, bp') = nextPos (d - j) bp
            y <- get p bp howMany
            let v' = v .|. (fromIntegral y `unsafeShiftL` j)
                j' = j + howMany
            inner f p i bp' v' j'

    get :: Ptr WordLE -> BitPos -> Int -> ST s WordM
    get p bp howMany = do
        x <- (`unsafeShiftR` bitPos bp) <$> peekWordPos p bp
        return (x .&. getMask howMany)
{-# SPECIALIZE byteDecode :: forall ba. ByteArrayAccess ba => Int -> View ba -> BlockN Sec N Word16 #-}
{-# SPECIALIZE byteDecode :: forall ba. ByteArrayAccess ba => Int -> View ba -> BlockN Pub N Word16 #-}
{-# SPECIALIZE byteDecode :: Int -> View Bytes -> BlockN Sec N Word16 #-}
{-# SPECIALIZE byteDecode :: Int -> View Bytes -> BlockN Pub N Word16 #-}

-- Optimization of byteDecode when 𝑑=1
byteDecode1 :: ByteArrayAccess ba => ba -> BlockN Sec N Word16
byteDecode1 !b = runST $
    ST.withByteArray b $ \p -> do
        f <- BlockN.new (Proxy :: Proxy Sec)
        outer f p 0
        BlockN.unsafeFreeze f
  where
    outer :: MutableBlockN Sec N Word16 s -> Ptr WordLE -> Int -> ST s ()
    outer !f !p i = when (i < n) $ do
        x <- peekWord p
        inner f (p `plusPtr` wordBytes) x i 0

    inner :: MutableBlockN Sec N Word16 s -> Ptr WordLE -> WordM -> Int -> Int -> ST s ()
    inner !f !p !acc !i j
        | j == wordBits = outer f p i
        | otherwise = do
            let v = fromIntegral (acc .&. 1)
            BlockN.write f (Offset i) v
            inner f p (acc `unsafeShiftR` 1) (i + 1) (j + 1)

-- byteDecode with 𝑑=12 and conversion to the field
byteDecode12 :: (Classified marking, ByteArrayAccess ba) => ba -> Tq marking
byteDecode12 = Tq . BlockN.map toZq . byteDecode 12
{-# SPECIALIZE byteDecode12 :: ByteArrayAccess ba => View ba -> Tq Sec #-}
{-# SPECIALIZE byteDecode12 :: ByteArrayAccess ba => View ba -> Tq Pub #-}
