{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ForeignFunctionInterface #-} {-# LANGUAGE MagicHash #-} module Data.HashTable.Internal.CacheLine ( cacheLineSearch , cacheLineSearch2 , cacheLineSearch3 , forwardSearch2 , forwardSearch3 , isCacheLineAligned , advanceByCacheLineSize , prefetchRead , prefetchWrite , bl_abs# , sign# , mask# , maskw# ) where import Control.Monad import Control.Monad.ST (ST) import Data.HashTable.Internal.IntArray (Elem, IntArray) import qualified Data.HashTable.Internal.IntArray as M #ifndef NO_C_SEARCH import Foreign.C.Types #else import Data.Bits import Data.Int import qualified Data.Vector.Unboxed as U import GHC.Int #endif import Data.HashTable.Internal.Utils import GHC.Exts #if __GLASGOW_HASKELL__ >= 707 import GHC.Exts (isTrue#) #else isTrue# :: Bool -> Bool isTrue# = id #endif {-# INLINE prefetchRead #-} {-# INLINE prefetchWrite #-} prefetchRead :: IntArray s -> Int -> ST s () prefetchWrite :: IntArray s -> Int -> ST s () #ifndef NO_C_SEARCH foreign import ccall unsafe "line_search" c_lineSearch :: Ptr a -> CInt -> CUShort -> IO CInt foreign import ccall unsafe "line_search_2" c_lineSearch_2 :: Ptr a -> CInt -> CUShort -> CUShort -> IO CInt foreign import ccall unsafe "line_search_2" c_lineSearch_3 :: Ptr a -> CInt -> CUShort -> CUShort -> CUShort -> IO CInt foreign import ccall unsafe "forward_search_2" c_forwardSearch_2 :: Ptr a -> CInt -> CInt -> CUShort -> CUShort -> IO CInt foreign import ccall unsafe "forward_search_3" c_forwardSearch_3 :: Ptr a -> CInt -> CInt -> CUShort -> CUShort -> CUShort -> IO CInt foreign import ccall unsafe "prefetch_cacheline_read" prefetchCacheLine_read :: Ptr a -> CInt -> IO () foreign import ccall unsafe "prefetch_cacheline_write" prefetchCacheLine_write :: Ptr a -> CInt -> IO () fI :: (Num b, Integral a) => a -> b fI = fromIntegral prefetchRead a i = unsafeIOToST $ prefetchCacheLine_read v x where v = M.toPtr a x = fI i prefetchWrite a i = unsafeIOToST $ prefetchCacheLine_write v x where v = M.toPtr a x = fI i {-# INLINE forwardSearch2 #-} forwardSearch2 :: IntArray s -> Int -> Int -> Elem -> Elem -> ST s Int forwardSearch2 !vec !start !end !x1 !x2 = liftM fromEnum $! unsafeIOToST c where c = c_forwardSearch_2 (M.toPtr vec) (fI start) (fI end) (fI x1) (fI x2) {-# INLINE forwardSearch3 #-} forwardSearch3 :: IntArray s -> Int -> Int -> Elem -> Elem -> Elem -> ST s Int forwardSearch3 !vec !start !end !x1 !x2 !x3 = liftM fromEnum $! unsafeIOToST c where c = c_forwardSearch_3 (M.toPtr vec) (fI start) (fI end) (fI x1) (fI x2) (fI x3) {-# INLINE lineSearch #-} lineSearch :: IntArray s -> Int -> Elem -> ST s Int lineSearch !vec !start !value = liftM fromEnum $! unsafeIOToST c where c = c_lineSearch (M.toPtr vec) (fI start) (fI value) {-# INLINE lineSearch2 #-} lineSearch2 :: IntArray s -> Int -> Elem -> Elem -> ST s Int lineSearch2 !vec !start !x1 !x2 = liftM fromEnum $! unsafeIOToST c where c = c_lineSearch_2 (M.toPtr vec) (fI start) (fI x1) (fI x2) {-# INLINE lineSearch3 #-} lineSearch3 :: IntArray s -> Int -> Elem -> Elem -> Elem -> ST s Int lineSearch3 !vec !start !x1 !x2 !x3 = liftM fromEnum $! unsafeIOToST c where c = c_lineSearch_3 (M.toPtr vec) (fI start) (fI x1) (fI x2) (fI x3) #endif {-# INLINE advanceByCacheLineSize #-} advanceByCacheLineSize :: Int -> Int -> Int advanceByCacheLineSize !(I# start0#) !(I# vecSize#) = out where !(I# clm#) = cacheLineIntMask !clmm# = not# (int2Word# clm#) !start# = word2Int# (clmm# `and#` int2Word# start0#) !(I# nw#) = numElemsInCacheLine !start'# = start# +# nw# !s# = sign# (vecSize# -# start'# -# 1#) !m# = not# (int2Word# s#) !r# = int2Word# start'# `and#` m# !out = I# (word2Int# r#) {-# INLINE isCacheLineAligned #-} isCacheLineAligned :: Int -> Bool isCacheLineAligned (I# x#) = isTrue# (r# ==# 0#) where !(I# m#) = cacheLineIntMask !mw# = int2Word# m# !w# = int2Word# x# !r# = word2Int# (mw# `and#` w#) {-# INLINE sign# #-} -- | Returns 0 if x is positive, -1 otherwise sign# :: Int# -> Int# sign# !x# = x# `uncheckedIShiftRA#` wordSizeMinus1# where !(I# wordSizeMinus1#) = wordSize-1 {-# INLINE bl_abs# #-} -- | Abs of an integer, branchless bl_abs# :: Int# -> Int# bl_abs# !x# = word2Int# r# where !m# = sign# x# !r# = (int2Word# (m# +# x#)) `xor#` int2Word# m# {-# INLINE mask# #-} -- | Returns 0xfff..fff (aka -1) if a# == b#, 0 otherwise. mask# :: Int# -> Int# -> Int# mask# !a# !b# = dest# where !d# = a# -# b# !r# = bl_abs# d# -# 1# !dest# = sign# r# {- note: this code should be: mask# :: Int# -> Int# -> Int# mask# !a# !b# = let !(I# z#) = fromEnum (a# ==# b#) !q# = negateInt# z# in q# but GHC doesn't properly optimize this as straight-line code at the moment. -} {-# INLINE maskw# #-} maskw# :: Int# -> Int# -> Word# maskw# !a# !b# = int2Word# (mask# a# b#) #ifdef NO_C_SEARCH prefetchRead _ _ = return () prefetchWrite _ _ = return () {-# INLINE forwardSearch2 #-} forwardSearch2 :: IntArray s -> Int -> Int -> Elem -> Elem -> ST s Int forwardSearch2 !vec !start !end !x1 !x2 = go start end False where go !i !e !b = if i >= e then if b then return (-1) else go 0 start True else do h <- M.readArray vec i if h == x1 || h == x2 then return i else go (i+1) e b {-# INLINE forwardSearch3 #-} forwardSearch3 :: IntArray s -> Int -> Int -> Elem -> Elem -> Elem -> ST s Int forwardSearch3 !vec !start !end !x1 !x2 !x3 = go start end False where go !i !e !b = if i >= e then if b then return (-1) else go 0 start True else do h <- M.readArray vec i if h == x1 || h == x2 || h == x3 then return i else go (i+1) e b deBruijnBitPositions :: U.Vector Int8 deBruijnBitPositions = U.fromList [ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 ] {-# INLINE firstBitSet# #-} -- only works with 32-bit values -- ok for us here firstBitSet# :: Int# -> Int# firstBitSet# i# = word2Int# ((or# zeroCase# posw#)) where !zeroCase# = int2Word# (mask# 0# i#) !w# = int2Word# i# !iLowest# = word2Int# (and# w# (int2Word# (negateInt# i#))) !idxW# = uncheckedShiftRL# (narrow32Word# (timesWord# (int2Word# iLowest#) (int2Word# 0x077CB531#))) 27# !idx = I# (word2Int# idxW#) !(I8# pos8#) = U.unsafeIndex deBruijnBitPositions idx !posw# = int2Word# pos8# #endif #ifdef NO_C_SEARCH lineResult# :: Word# -- ^ mask -> Int -- ^ start value -> Int lineResult# bitmask# (I# start#) = I# (word2Int# rv#) where !p# = firstBitSet# (word2Int# bitmask#) !mm# = maskw# p# (-1#) !nmm# = not# mm# !rv# = mm# `or#` (nmm# `and#` (int2Word# (start# +# p#))) {-# INLINE lineResult# #-} -- Required: unlike in C search, required that the start index is -- cache-line-aligned and array has at least 32 elements after the start index lineSearch :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Elem -- ^ value to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found lineSearch !vec !start !elem1 = do let !(I# v1#) = fromIntegral elem1 !(I# x1#) <- liftM fromIntegral $ M.readArray vec start let !p1# = (and# (maskw# x1# v1#) (int2Word# 0x1#)) !(I# x2#) <- liftM fromIntegral $ M.readArray vec $! start + 1 let !p2# = or# p1# (and# (maskw# x2# v1#) (int2Word# 0x2#)) !(I# x3#) <- liftM fromIntegral $ M.readArray vec $! start + 2 let !p3# = or# p2# (and# (maskw# x3# v1#) (int2Word# 0x4#)) !(I# x4#) <- liftM fromIntegral $ M.readArray vec $! start + 3 let !p4# = or# p3# (and# (maskw# x4# v1#) (int2Word# 0x8#)) !(I# x5#) <- liftM fromIntegral $ M.readArray vec $! start + 4 let !p5# = or# p4# (and# (maskw# x5# v1#) (int2Word# 0x10#)) !(I# x6#) <- liftM fromIntegral $ M.readArray vec $! start + 5 let !p6# = or# p5# (and# (maskw# x6# v1#) (int2Word# 0x20#)) !(I# x7#) <- liftM fromIntegral $ M.readArray vec $! start + 6 let !p7# = or# p6# (and# (maskw# x7# v1#) (int2Word# 0x40#)) !(I# x8#) <- liftM fromIntegral $ M.readArray vec $! start + 7 let !p8# = or# p7# (and# (maskw# x8# v1#) (int2Word# 0x80#)) !(I# x9#) <- liftM fromIntegral $ M.readArray vec $! start + 8 let !p9# = or# p8# (and# (maskw# x9# v1#) (int2Word# 0x100#)) !(I# x10#) <- liftM fromIntegral $ M.readArray vec $! start + 9 let !p10# = or# p9# (and# (maskw# x10# v1#) (int2Word# 0x200#)) !(I# x11#) <- liftM fromIntegral $ M.readArray vec $! start + 10 let !p11# = or# p10# (and# (maskw# x11# v1#) (int2Word# 0x400#)) !(I# x12#) <- liftM fromIntegral $ M.readArray vec $! start + 11 let !p12# = or# p11# (and# (maskw# x12# v1#) (int2Word# 0x800#)) !(I# x13#) <- liftM fromIntegral $ M.readArray vec $! start + 12 let !p13# = or# p12# (and# (maskw# x13# v1#) (int2Word# 0x1000#)) !(I# x14#) <- liftM fromIntegral $ M.readArray vec $! start + 13 let !p14# = or# p13# (and# (maskw# x14# v1#) (int2Word# 0x2000#)) !(I# x15#) <- liftM fromIntegral $ M.readArray vec $! start + 14 let !p15# = or# p14# (and# (maskw# x15# v1#) (int2Word# 0x4000#)) !(I# x16#) <- liftM fromIntegral $ M.readArray vec $! start + 15 let !p16# = or# p15# (and# (maskw# x16# v1#) (int2Word# 0x8000#)) !(I# x17#) <- liftM fromIntegral $ M.readArray vec $! start + 16 let !p17# = or# p16# (and# (maskw# x17# v1#) (int2Word# 0x10000#)) !(I# x18#) <- liftM fromIntegral $ M.readArray vec $! start + 17 let !p18# = or# p17# (and# (maskw# x18# v1#) (int2Word# 0x20000#)) !(I# x19#) <- liftM fromIntegral $ M.readArray vec $! start + 18 let !p19# = or# p18# (and# (maskw# x19# v1#) (int2Word# 0x40000#)) !(I# x20#) <- liftM fromIntegral $ M.readArray vec $! start + 19 let !p20# = or# p19# (and# (maskw# x20# v1#) (int2Word# 0x80000#)) !(I# x21#) <- liftM fromIntegral $ M.readArray vec $! start + 20 let !p21# = or# p20# (and# (maskw# x21# v1#) (int2Word# 0x100000#)) !(I# x22#) <- liftM fromIntegral $ M.readArray vec $! start + 21 let !p22# = or# p21# (and# (maskw# x22# v1#) (int2Word# 0x200000#)) !(I# x23#) <- liftM fromIntegral $ M.readArray vec $! start + 22 let !p23# = or# p22# (and# (maskw# x23# v1#) (int2Word# 0x400000#)) !(I# x24#) <- liftM fromIntegral $ M.readArray vec $! start + 23 let !p24# = or# p23# (and# (maskw# x24# v1#) (int2Word# 0x800000#)) !(I# x25#) <- liftM fromIntegral $ M.readArray vec $! start + 24 let !p25# = or# p24# (and# (maskw# x25# v1#) (int2Word# 0x1000000#)) !(I# x26#) <- liftM fromIntegral $ M.readArray vec $! start + 25 let !p26# = or# p25# (and# (maskw# x26# v1#) (int2Word# 0x2000000#)) !(I# x27#) <- liftM fromIntegral $ M.readArray vec $! start + 26 let !p27# = or# p26# (and# (maskw# x27# v1#) (int2Word# 0x4000000#)) !(I# x28#) <- liftM fromIntegral $ M.readArray vec $! start + 27 let !p28# = or# p27# (and# (maskw# x28# v1#) (int2Word# 0x8000000#)) !(I# x29#) <- liftM fromIntegral $ M.readArray vec $! start + 28 let !p29# = or# p28# (and# (maskw# x29# v1#) (int2Word# 0x10000000#)) !(I# x30#) <- liftM fromIntegral $ M.readArray vec $! start + 29 let !p30# = or# p29# (and# (maskw# x30# v1#) (int2Word# 0x20000000#)) !(I# x31#) <- liftM fromIntegral $ M.readArray vec $! start + 30 let !p31# = or# p30# (and# (maskw# x31# v1#) (int2Word# 0x40000000#)) !(I# x32#) <- liftM fromIntegral $ M.readArray vec $! start + 31 let !p32# = or# p31# (and# (maskw# x32# v1#) (int2Word# 0x80000000#)) return $! lineResult# p32# start -- Required: unlike in C search, required that the start index is -- cache-line-aligned and array has at least 32 elements after the start index lineSearch2 :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Elem -- ^ value to search for -> Elem -- ^ second value to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found lineSearch2 !vec !start !elem1 !elem2 = do let !(I# v1#) = fromIntegral elem1 let !(I# v2#) = fromIntegral elem2 !(I# x1#) <- liftM fromIntegral $ M.readArray vec start let !p1# = (and# (int2Word# 0x1#) (or# (maskw# x1# v1#) (maskw# x1# v2#))) !(I# x2#) <- liftM fromIntegral $ M.readArray vec $! start + 1 let !p2# = or# p1# (and# (int2Word# 0x2#) (or# (maskw# x2# v1#) (maskw# x2# v2#))) !(I# x3#) <- liftM fromIntegral $ M.readArray vec $! start + 2 let !p3# = or# p2# (and# (int2Word# 0x4#) (or# (maskw# x3# v1#) (maskw# x3# v2#))) !(I# x4#) <- liftM fromIntegral $ M.readArray vec $! start + 3 let !p4# = or# p3# (and# (int2Word# 0x8#) (or# (maskw# x4# v1#) (maskw# x4# v2#))) !(I# x5#) <- liftM fromIntegral $ M.readArray vec $! start + 4 let !p5# = or# p4# (and# (int2Word# 0x10#) (or# (maskw# x5# v1#) (maskw# x5# v2#))) !(I# x6#) <- liftM fromIntegral $ M.readArray vec $! start + 5 let !p6# = or# p5# (and# (int2Word# 0x20#) (or# (maskw# x6# v1#) (maskw# x6# v2#))) !(I# x7#) <- liftM fromIntegral $ M.readArray vec $! start + 6 let !p7# = or# p6# (and# (int2Word# 0x40#) (or# (maskw# x7# v1#) (maskw# x7# v2#))) !(I# x8#) <- liftM fromIntegral $ M.readArray vec $! start + 7 let !p8# = or# p7# (and# (int2Word# 0x80#) (or# (maskw# x8# v1#) (maskw# x8# v2#))) !(I# x9#) <- liftM fromIntegral $ M.readArray vec $! start + 8 let !p9# = or# p8# (and# (int2Word# 0x100#) (or# (maskw# x9# v1#) (maskw# x9# v2#))) !(I# x10#) <- liftM fromIntegral $ M.readArray vec $! start + 9 let !p10# = or# p9# (and# (int2Word# 0x200#) (or# (maskw# x10# v1#) (maskw# x10# v2#))) !(I# x11#) <- liftM fromIntegral $ M.readArray vec $! start + 10 let !p11# = or# p10# (and# (int2Word# 0x400#) (or# (maskw# x11# v1#) (maskw# x11# v2#))) !(I# x12#) <- liftM fromIntegral $ M.readArray vec $! start + 11 let !p12# = or# p11# (and# (int2Word# 0x800#) (or# (maskw# x12# v1#) (maskw# x12# v2#))) !(I# x13#) <- liftM fromIntegral $ M.readArray vec $! start + 12 let !p13# = or# p12# (and# (int2Word# 0x1000#) (or# (maskw# x13# v1#) (maskw# x13# v2#))) !(I# x14#) <- liftM fromIntegral $ M.readArray vec $! start + 13 let !p14# = or# p13# (and# (int2Word# 0x2000#) (or# (maskw# x14# v1#) (maskw# x14# v2#))) !(I# x15#) <- liftM fromIntegral $ M.readArray vec $! start + 14 let !p15# = or# p14# (and# (int2Word# 0x4000#) (or# (maskw# x15# v1#) (maskw# x15# v2#))) !(I# x16#) <- liftM fromIntegral $ M.readArray vec $! start + 15 let !p16# = or# p15# (and# (int2Word# 0x8000#) (or# (maskw# x16# v1#) (maskw# x16# v2#))) !(I# x17#) <- liftM fromIntegral $ M.readArray vec $! start + 16 let !p17# = or# p16# (and# (int2Word# 0x10000#) (or# (maskw# x17# v1#) (maskw# x17# v2#))) !(I# x18#) <- liftM fromIntegral $ M.readArray vec $! start + 17 let !p18# = or# p17# (and# (int2Word# 0x20000#) (or# (maskw# x18# v1#) (maskw# x18# v2#))) !(I# x19#) <- liftM fromIntegral $ M.readArray vec $! start + 18 let !p19# = or# p18# (and# (int2Word# 0x40000#) (or# (maskw# x19# v1#) (maskw# x19# v2#))) !(I# x20#) <- liftM fromIntegral $ M.readArray vec $! start + 19 let !p20# = or# p19# (and# (int2Word# 0x80000#) (or# (maskw# x20# v1#) (maskw# x20# v2#))) !(I# x21#) <- liftM fromIntegral $ M.readArray vec $! start + 20 let !p21# = or# p20# (and# (int2Word# 0x100000#) (or# (maskw# x21# v1#) (maskw# x21# v2#))) !(I# x22#) <- liftM fromIntegral $ M.readArray vec $! start + 21 let !p22# = or# p21# (and# (int2Word# 0x200000#) (or# (maskw# x22# v1#) (maskw# x22# v2#))) !(I# x23#) <- liftM fromIntegral $ M.readArray vec $! start + 22 let !p23# = or# p22# (and# (int2Word# 0x400000#) (or# (maskw# x23# v1#) (maskw# x23# v2#))) !(I# x24#) <- liftM fromIntegral $ M.readArray vec $! start + 23 let !p24# = or# p23# (and# (int2Word# 0x800000#) (or# (maskw# x24# v1#) (maskw# x24# v2#))) !(I# x25#) <- liftM fromIntegral $ M.readArray vec $! start + 24 let !p25# = or# p24# (and# (int2Word# 0x1000000#) (or# (maskw# x25# v1#) (maskw# x25# v2#))) !(I# x26#) <- liftM fromIntegral $ M.readArray vec $! start + 25 let !p26# = or# p25# (and# (int2Word# 0x2000000#) (or# (maskw# x26# v1#) (maskw# x26# v2#))) !(I# x27#) <- liftM fromIntegral $ M.readArray vec $! start + 26 let !p27# = or# p26# (and# (int2Word# 0x4000000#) (or# (maskw# x27# v1#) (maskw# x27# v2#))) !(I# x28#) <- liftM fromIntegral $ M.readArray vec $! start + 27 let !p28# = or# p27# (and# (int2Word# 0x8000000#) (or# (maskw# x28# v1#) (maskw# x28# v2#))) !(I# x29#) <- liftM fromIntegral $ M.readArray vec $! start + 28 let !p29# = or# p28# (and# (int2Word# 0x10000000#) (or# (maskw# x29# v1#) (maskw# x29# v2#))) !(I# x30#) <- liftM fromIntegral $ M.readArray vec $! start + 29 let !p30# = or# p29# (and# (int2Word# 0x20000000#) (or# (maskw# x30# v1#) (maskw# x30# v2#))) !(I# x31#) <- liftM fromIntegral $ M.readArray vec $! start + 30 let !p31# = or# p30# (and# (int2Word# 0x40000000#) (or# (maskw# x31# v1#) (maskw# x31# v2#))) !(I# x32#) <- liftM fromIntegral $ M.readArray vec $! start + 31 let !p32# = or# p31# (and# (int2Word# 0x80000000#) (or# (maskw# x32# v1#) (maskw# x32# v2#))) return $! lineResult# p32# start -- Required: unlike in C search, required that the start index is -- cache-line-aligned and array has at least 32 elements after the start index lineSearch3 :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Elem -- ^ value to search for -> Elem -- ^ second value to search for -> Elem -- ^ third value to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found lineSearch3 !vec !start !elem1 !elem2 !elem3 = do let !(I# v1#) = fromIntegral elem1 let !(I# v2#) = fromIntegral elem2 let !(I# v3#) = fromIntegral elem3 !(I# x1#) <- liftM fromIntegral $ M.readArray vec start let !p1# = (and# (int2Word# 0x1#) (or# (maskw# x1# v1#) (or# (maskw# x1# v2#) (maskw# x1# v3#)))) !(I# x2#) <- liftM fromIntegral $ M.readArray vec $! start + 1 let !p2# = or# p1# (and# (int2Word# 0x2#) (or# (maskw# x2# v1#) (or# (maskw# x2# v2#) (maskw# x2# v3#)))) !(I# x3#) <- liftM fromIntegral $ M.readArray vec $! start + 2 let !p3# = or# p2# (and# (int2Word# 0x4#) (or# (maskw# x3# v1#) (or# (maskw# x3# v2#) (maskw# x3# v3#)))) !(I# x4#) <- liftM fromIntegral $ M.readArray vec $! start + 3 let !p4# = or# p3# (and# (int2Word# 0x8#) (or# (maskw# x4# v1#) (or# (maskw# x4# v2#) (maskw# x4# v3#)))) !(I# x5#) <- liftM fromIntegral $ M.readArray vec $! start + 4 let !p5# = or# p4# (and# (int2Word# 0x10#) (or# (maskw# x5# v1#) (or# (maskw# x5# v2#) (maskw# x5# v3#)))) !(I# x6#) <- liftM fromIntegral $ M.readArray vec $! start + 5 let !p6# = or# p5# (and# (int2Word# 0x20#) (or# (maskw# x6# v1#) (or# (maskw# x6# v2#) (maskw# x6# v3#)))) !(I# x7#) <- liftM fromIntegral $ M.readArray vec $! start + 6 let !p7# = or# p6# (and# (int2Word# 0x40#) (or# (maskw# x7# v1#) (or# (maskw# x7# v2#) (maskw# x7# v3#)))) !(I# x8#) <- liftM fromIntegral $ M.readArray vec $! start + 7 let !p8# = or# p7# (and# (int2Word# 0x80#) (or# (maskw# x8# v1#) (or# (maskw# x8# v2#) (maskw# x8# v3#)))) !(I# x9#) <- liftM fromIntegral $ M.readArray vec $! start + 8 let !p9# = or# p8# (and# (int2Word# 0x100#) (or# (maskw# x9# v1#) (or# (maskw# x9# v2#) (maskw# x9# v3#)))) !(I# x10#) <- liftM fromIntegral $ M.readArray vec $! start + 9 let !p10# = or# p9# (and# (int2Word# 0x200#) (or# (maskw# x10# v1#) (or# (maskw# x10# v2#) (maskw# x10# v3#)))) !(I# x11#) <- liftM fromIntegral $ M.readArray vec $! start + 10 let !p11# = or# p10# (and# (int2Word# 0x400#) (or# (maskw# x11# v1#) (or# (maskw# x11# v2#) (maskw# x11# v3#)))) !(I# x12#) <- liftM fromIntegral $ M.readArray vec $! start + 11 let !p12# = or# p11# (and# (int2Word# 0x800#) (or# (maskw# x12# v1#) (or# (maskw# x12# v2#) (maskw# x12# v3#)))) !(I# x13#) <- liftM fromIntegral $ M.readArray vec $! start + 12 let !p13# = or# p12# (and# (int2Word# 0x1000#) (or# (maskw# x13# v1#) (or# (maskw# x13# v2#) (maskw# x13# v3#)))) !(I# x14#) <- liftM fromIntegral $ M.readArray vec $! start + 13 let !p14# = or# p13# (and# (int2Word# 0x2000#) (or# (maskw# x14# v1#) (or# (maskw# x14# v2#) (maskw# x14# v3#)))) !(I# x15#) <- liftM fromIntegral $ M.readArray vec $! start + 14 let !p15# = or# p14# (and# (int2Word# 0x4000#) (or# (maskw# x15# v1#) (or# (maskw# x15# v2#) (maskw# x15# v3#)))) !(I# x16#) <- liftM fromIntegral $ M.readArray vec $! start + 15 let !p16# = or# p15# (and# (int2Word# 0x8000#) (or# (maskw# x16# v1#) (or# (maskw# x16# v2#) (maskw# x16# v3#)))) !(I# x17#) <- liftM fromIntegral $ M.readArray vec $! start + 16 let !p17# = or# p16# (and# (int2Word# 0x10000#) (or# (maskw# x17# v1#) (or# (maskw# x17# v2#) (maskw# x17# v3#)))) !(I# x18#) <- liftM fromIntegral $ M.readArray vec $! start + 17 let !p18# = or# p17# (and# (int2Word# 0x20000#) (or# (maskw# x18# v1#) (or# (maskw# x18# v2#) (maskw# x18# v3#)))) !(I# x19#) <- liftM fromIntegral $ M.readArray vec $! start + 18 let !p19# = or# p18# (and# (int2Word# 0x40000#) (or# (maskw# x19# v1#) (or# (maskw# x19# v2#) (maskw# x19# v3#)))) !(I# x20#) <- liftM fromIntegral $ M.readArray vec $! start + 19 let !p20# = or# p19# (and# (int2Word# 0x80000#) (or# (maskw# x20# v1#) (or# (maskw# x20# v2#) (maskw# x20# v3#)))) !(I# x21#) <- liftM fromIntegral $ M.readArray vec $! start + 20 let !p21# = or# p20# (and# (int2Word# 0x100000#) (or# (maskw# x21# v1#) (or# (maskw# x21# v2#) (maskw# x21# v3#)))) !(I# x22#) <- liftM fromIntegral $ M.readArray vec $! start + 21 let !p22# = or# p21# (and# (int2Word# 0x200000#) (or# (maskw# x22# v1#) (or# (maskw# x22# v2#) (maskw# x22# v3#)))) !(I# x23#) <- liftM fromIntegral $ M.readArray vec $! start + 22 let !p23# = or# p22# (and# (int2Word# 0x400000#) (or# (maskw# x23# v1#) (or# (maskw# x23# v2#) (maskw# x23# v3#)))) !(I# x24#) <- liftM fromIntegral $ M.readArray vec $! start + 23 let !p24# = or# p23# (and# (int2Word# 0x800000#) (or# (maskw# x24# v1#) (or# (maskw# x24# v2#) (maskw# x24# v3#)))) !(I# x25#) <- liftM fromIntegral $ M.readArray vec $! start + 24 let !p25# = or# p24# (and# (int2Word# 0x1000000#) (or# (maskw# x25# v1#) (or# (maskw# x25# v2#) (maskw# x25# v3#)))) !(I# x26#) <- liftM fromIntegral $ M.readArray vec $! start + 25 let !p26# = or# p25# (and# (int2Word# 0x2000000#) (or# (maskw# x26# v1#) (or# (maskw# x26# v2#) (maskw# x26# v3#)))) !(I# x27#) <- liftM fromIntegral $ M.readArray vec $! start + 26 let !p27# = or# p26# (and# (int2Word# 0x4000000#) (or# (maskw# x27# v1#) (or# (maskw# x27# v2#) (maskw# x27# v3#)))) !(I# x28#) <- liftM fromIntegral $ M.readArray vec $! start + 27 let !p28# = or# p27# (and# (int2Word# 0x8000000#) (or# (maskw# x28# v1#) (or# (maskw# x28# v2#) (maskw# x28# v3#)))) !(I# x29#) <- liftM fromIntegral $ M.readArray vec $! start + 28 let !p29# = or# p28# (and# (int2Word# 0x10000000#) (or# (maskw# x29# v1#) (or# (maskw# x29# v2#) (maskw# x29# v3#)))) !(I# x30#) <- liftM fromIntegral $ M.readArray vec $! start + 29 let !p30# = or# p29# (and# (int2Word# 0x20000000#) (or# (maskw# x30# v1#) (or# (maskw# x30# v2#) (maskw# x30# v3#)))) !(I# x31#) <- liftM fromIntegral $ M.readArray vec $! start + 30 let !p31# = or# p30# (and# (int2Word# 0x40000000#) (or# (maskw# x31# v1#) (or# (maskw# x31# v2#) (maskw# x31# v3#)))) !(I# x32#) <- liftM fromIntegral $ M.readArray vec $! start + 31 let !p32# = or# p31# (and# (int2Word# 0x80000000#) (or# (maskw# x32# v1#) (or# (maskw# x32# v2#) (maskw# x32# v3#)))) return $! lineResult# p32# start ------------------------------------------------------------------------------ -- | Search through a mutable vector for a given int value. The number of -- things to search for must be at most the number of things remaining in the -- vector. naiveSearch :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Int -- ^ number of things to search -> Elem -- ^ value to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found naiveSearch !vec !start !nThings !value = go start where !doneIdx = start + nThings go !i | i >= doneIdx = return (-1) | otherwise = do x <- M.readArray vec i if x == value then return i else go (i+1) {-# INLINE naiveSearch #-} ------------------------------------------------------------------------------ naiveSearch2 :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Int -- ^ number of things to search -> Elem -- ^ value to search for -> Elem -- ^ value 2 to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found naiveSearch2 !vec !start !nThings !value1 !value2 = go start where !doneIdx = start + nThings go !i | i >= doneIdx = return (-1) | otherwise = do x <- M.readArray vec i if x == value1 || x == value2 then return i else go (i+1) {-# INLINE naiveSearch2 #-} ------------------------------------------------------------------------------ naiveSearch3 :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Int -- ^ number of things to search -> Elem -- ^ value to search for -> Elem -- ^ value 2 to search for -> Elem -- ^ value 3 to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found naiveSearch3 !vec !start !nThings !value1 !value2 !value3 = go start where !doneIdx = start + nThings go !i | i >= doneIdx = return (-1) | otherwise = do x <- M.readArray vec i if x == value1 || x == value2 || x == value3 then return i else go (i+1) {-# INLINE naiveSearch3 #-} -- end #if NO_C_SEARCH #endif ------------------------------------------------------------------------------ -- | Search through a mutable vector for a given int value, cache-line aligned. -- If the start index is cache-line aligned, and there is more than a -- cache-line's room between the start index and the end of the vector, we will -- search the cache-line all at once using an efficient branchless -- bit-twiddling technique. Otherwise, we will use a typical loop. -- cacheLineSearch :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Elem -- ^ value to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found cacheLineSearch !vec !start !value = do #ifdef NO_C_SEARCH let !vlen = M.length vec let !st1 = vlen - start let !nvlen = numElemsInCacheLine - st1 let adv = (start + cacheLineIntMask) .&. complement cacheLineIntMask let st2 = adv - start if nvlen > 0 || not (isCacheLineAligned start) then naiveSearch vec start (min st1 st2) value else lineSearch vec start value #else lineSearch vec start value #endif {-# INLINE cacheLineSearch #-} ------------------------------------------------------------------------------ -- | Search through a mutable vector for one of two given int values, -- cache-line aligned. If the start index is cache-line aligned, and there is -- more than a cache-line's room between the start index and the end of the -- vector, we will search the cache-line all at once using an efficient -- branchless bit-twiddling technique. Otherwise, we will use a typical loop. -- cacheLineSearch2 :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Elem -- ^ value to search for -> Elem -- ^ value 2 to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found cacheLineSearch2 !vec !start !value !value2 = do #ifdef NO_C_SEARCH let !vlen = M.length vec let !st1 = vlen - start let !nvlen = numElemsInCacheLine - st1 let adv = (start + cacheLineIntMask) .&. complement cacheLineIntMask let st2 = adv - start if nvlen > 0 || not (isCacheLineAligned start) then naiveSearch2 vec start (min st1 st2) value value2 else lineSearch2 vec start value value2 #else lineSearch2 vec start value value2 #endif {-# INLINE cacheLineSearch2 #-} ------------------------------------------------------------------------------ -- | Search through a mutable vector for one of three given int values, -- cache-line aligned. If the start index is cache-line aligned, and there is -- more than a cache-line's room between the start index and the end of the -- vector, we will search the cache-line all at once using an efficient -- branchless bit-twiddling technique. Otherwise, we will use a typical loop. -- cacheLineSearch3 :: IntArray s -- ^ vector to search -> Int -- ^ start index -> Elem -- ^ value to search for -> Elem -- ^ value 2 to search for -> Elem -- ^ value 3 to search for -> ST s Int -- ^ dest index where it can be found, or -- \"-1\" if not found cacheLineSearch3 !vec !start !value !value2 !value3 = do #ifdef NO_C_SEARCH let !vlen = M.length vec let !st1 = vlen - start let !nvlen = numElemsInCacheLine - st1 let adv = (start + cacheLineIntMask) .&. complement cacheLineIntMask let st2 = adv - start if nvlen > 0 || not (isCacheLineAligned start) then naiveSearch3 vec start (min st1 st2) value value2 value3 else lineSearch3 vec start value value2 value3 #else lineSearch3 vec start value value2 value3 #endif {-# INLINE cacheLineSearch3 #-}