{-# LANGUAGE CPP       #-}

{-|
Module      : Data.Bits.Pdep.Prim
Description : Primop wrappers for the Parallel Deposit operation
Copyright   : (c) John Ky, 2018-2019
License     : BSD-3-Clause
Maintainer  : newhoggy@gmail.com
Stability   : stable
-}
module Data.Bits.Pdep.Prim
  ( primPdep
  , primPdep8
  , primPdep16
  , primPdep32
  , primPdep64
  , fastPdepEnabled
  ) where

import GHC.Word

#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
import GHC.Prim
#else
import Data.Bits.Pdep.Slow
#endif

-- | Bitwise parallel deposit for 'Word'.  Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep 1 1
-- 1
primPdep
  :: Word -- ^ word containing the bits that will be deposited
  -> Word -- ^ bitmap selecting the bits that are to be deposited
  -> Word -- ^ word containing the deposited bits
primPdep :: Word -> Word -> Word
primPdep Word
src Word
mask = Word64 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64 -> Word64 -> Word64
primPdep64 (Word -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
src) (Word -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word
mask))
{-# INLINE primPdep #-}

-- | Bitwise parallel deposit for 'Word64'.  Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep64 1 1
-- 1
primPdep64
  :: Word64 -- ^ word containing the bits that will be deposited
  -> Word64 -- ^ bitmap selecting the bits that are to be deposited
  -> Word64 -- ^ word containing the deposited bits
#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
primPdep64 (W64# src#) (W64# mask#) = W64# (pdep64# src# mask#)
#else
primPdep64 :: Word64 -> Word64 -> Word64
primPdep64 = Word64 -> Word64 -> Word64
forall a. SlowPdep a => a -> a -> a
slowPdep
#endif
{-# INLINE primPdep64 #-}

-- | Bitwise parallel deposit for 'Word32'.  Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep32 1 1
-- 1
primPdep32
  :: Word32 -- ^ word containing the bits that will be deposited
  -> Word32 -- ^ bitmap selecting the bits that are to be deposited
  -> Word32 -- ^ word containing the deposited bits
#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
primPdep32 (W32# src#) (W32# mask#) = W32# (pdep32# src# mask#)
#else
primPdep32 :: Word32 -> Word32 -> Word32
primPdep32 = Word32 -> Word32 -> Word32
forall a. SlowPdep a => a -> a -> a
slowPdep
#endif
{-# INLINE primPdep32 #-}

-- | Bitwise parallel deposit for 'Word16'.  Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep16 1 1
-- 1
primPdep16
  :: Word16 -- ^ word containing the bits that will be deposited
  -> Word16 -- ^ bitmap selecting the bits that are to be deposited
  -> Word16 -- ^ word containing the deposited bits
primPdep16 :: Word16 -> Word16 -> Word16
primPdep16 Word16
src Word16
mask = Word32 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Word32 -> Word32
primPdep32 (Word16 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
src) (Word16 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word16
mask))
{-# INLINE primPdep16 #-}

-- | Bitwise parallel deposit for 'Word8'.  Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep8 1 1
-- 1
primPdep8
  :: Word8 -- ^ word containing the bits that will be deposited
  -> Word8 -- ^ bitmap selecting the bits that are to be deposited
  -> Word8 -- ^ word containing the deposited bits
primPdep8 :: Word8 -> Word8 -> Word8
primPdep8 Word8
src Word8
mask = Word32 -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Word32 -> Word32
primPdep32 (Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
src) (Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
mask))
{-# INLINE primPdep8 #-}

-- | Runtime flag indicating whether the 'pdep' function is using the high-performance.
-- BMI2 instruction set.  A value of `False` indicates that `pdep` is emulated.
fastPdepEnabled :: Bool
#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
fastPdepEnabled = True
#else
fastPdepEnabled :: Bool
fastPdepEnabled = Bool
False
#endif
{-# INLINE fastPdepEnabled #-}