{-# LANGUAGE TemplateHaskell #-} module NoSlow.Micro.Kernels ( kernels ) where import qualified NoSlow.Backend.Interface as I import NoSlow.Util.Base kernels = [d| -- --------------------------- -- map, zipWith, replicate -- --------------------------- -- Add a constant to every element -- -- Doesn't say much on its own but is useful for evaluating the performance -- of splus splus1 :: (Num a, I.Vector v a) => Ty (v a) -> v a -> v a splus1 _ as = named "$a+1" $ I.map (+1) as -- Add a constant to every element (version 2) -- -- No intermediate array should be created, performance should be -- similar to splus1 splus1_r :: (Num a, I.Vector v a) => Ty (v a) -> v a -> v a splus1_r _ as = named "$a+^1" $ I.zipWith (+) as (I.replicate (I.length as) 1) -- Add a number to every element -- -- The x should only be inspected once outside the loop. Performance should -- be similar to splus1 splus :: (Num a, I.Vector v a) => Ty (v a) -> v a -> a -> v a splus _ as x = named "$a+x" $ I.map (+x) as -- Add a number to every element (version 2) -- -- No intermediate array should be created, performance should be -- similar to splus and splus1_r splus_r :: (Num a, I.Vector v a) => Ty (v a) -> v a -> a -> v a splus_r _ as x = named "$a+^x" $ I.zipWith (+) as (I.replicate (I.length as) x) -- Do these maps get fused? splus4 :: (Num a, I.Vector v a) => Ty (v a) -> v a -> a -> v a splus4 _ as x = named "$a+x+x+x+x" $ I.map (+x) $ I.map (+x) $ I.map (+x) $ I.map (+x) as -- Elementwise addition -- -- Lower bound on the execution time of the following benchmarks plus :: (Num a, I.Vector v a) => Ty (v a) -> v a -> v a -> v a plus _ as bs = named "$a+$b" $ I.zipWith (+) as bs -- Checks speed of map/zip compared to zipWith plus_zip :: (Num a, I.Vector v a) => Ty (v a) -> v a -> v a -> v a plus_zip _ as bs = named "$a+$b(zip)" $ I.map (\p -> I.fst p + I.snd p) (I.zip as bs) -- x should only be inspected once outside the loop axpy :: (Num a, I.Vector v a) => Ty (v a) -> a -> v a -> v a -> v a axpy _ x as bs = named "x*$a+$b" $ I.zipWith (+) (I.map (x*) as) bs -- Lots of zips can be inefficient with stream fusion. mpp :: (Num a, I.Vector v a) => Ty (v a) -> v a -> v a -> v a -> v a -> v a mpp _ as bs cs ds = named "($a+$b)*($c+$d)" $ I.zipWith (*) (I.zipWith (+) as bs) (I.zipWith (+) cs ds) -- How does this compare to mpp? mpp_zip :: (Num a, I.Vector v a) => Ty (v a) -> v a -> v a -> v a -> v a -> v a mpp_zip _ as bs cs ds = named "($a+$b)*($c+$d)(zip)" $ I.map (\p -> (I.fst (I.fst p) + I.snd (I.fst p)) * (I.fst (I.snd p) + I.snd (I.snd p))) (I.zip (I.zip as bs) (I.zip cs ds)) -- Both x and y should be inspected once. mspsp :: (Num a, I.Vector v a) => Ty (v a) -> a -> v a -> a -> v a -> v a mspsp _ x as y bs = named "(x+$a)*(y+$b)" $ I.zipWith (*) (I.map (x+) as) (I.map (y+) bs) -- Do we get rid of the replicates? mspsp_r :: (Num a, I.Vector v a) => Ty (v a) -> a -> v a -> a -> v a -> v a mspsp_r _ x as y bs = named "(^x+$a)*(^y+$b)" $ I.zipWith (*) (I.zipWith (+) (I.replicate (I.length as) x) as) (I.zipWith (+) (I.replicate (I.length bs) y) bs) -- ----------- -- Filters -- ----------- -- Removes all elements from the list, basic test of filter fusion filterout :: (Num a, Eq a, I.Vector v a) => Ty (v a) -> v a -> v a filterout _ as = named "filter(neq0)(map0)" $ I.filter (/=0) $ I.map (\x -> x-x) as -- Only do the test once, no loop should be executed filterout_r :: (Num a, Eq a, I.Vector v a) => Ty (v a) -> Len -> v a filterout_r _ (Len n) = named "filter(neq0)(^0)" $ I.filter (/=0) $ I.replicate n 0 -- Retain all elements in a list filterin :: (Num a, Eq a, I.Vector v a) => Ty (v a) -> v a -> v a filterin _ as = named "filter(eq0)(map0)" $ I.filter (==0) $ I.map (\x -> x-x) as -- Only do the test once, should be faster than filterin filterin_r :: (Num a, Eq a, I.Vector v a) => Ty (v a) -> Len -> v a filterin_r _ (Len n) = named "filter(eq0)(^0)" $ I.filter (==0) $ I.replicate n 0 -- Compute x outside of the loop zip_filter :: (Num a, Ord a, I.Vector v a) => Ty (v a) -> Len -> v a -> v a -> v a zip_filter _ (Len n) as bs = let x = fromIntegral (n `div` 2) in I.zipWith (+) (I.filter ( Ty (v a) -> Len -> v a -> v a -> v a filter_zip _ (Len n) as bs = I.filter (< fromIntegral n) (I.zipWith (+) as bs) filter_evens :: I.Vector v a => Ty (v a) -> Len -> v a -> v a filter_evens _ (Len n) as = I.map I.snd $ I.filter (even . I.fst) $ I.zip (I.enumFromTo_Int 0 (n-1)) as -- -------- -- Sums -- -------- -- Dot product dotp :: (Num a, I.Vector v a) => Ty (v a) -> v a -> v a -> a dotp _ as bs = named "sum($a*$b)" $ I.sum (I.zipWith (*) as bs) dotp_zip :: (Num a, I.Vector v a) => Ty (v a) -> v a -> v a -> a dotp_zip _ as bs = named "sum($a*$b)(zip)" $ I.sum (I.map (\p -> I.fst p * I.snd p) (I.zip as bs)) -- sum [1 .. n] sumn :: (Num a, I.Vector v a) => Ty (v a) -> Len -> a sumn ty (Len n) = named "sum[m..n]" $ I.sum $ I.map fromIntegral (I.enumFromTo_Int 1 n) `ofType` ty sumsq_map :: (Num a, I.Vector v a) => Ty (v a) -> Len -> a sumsq_map ty (Len n) = named "sumsq(map)" $ I.sum $ I.map (\x -> x*x) $ I.map fromIntegral (I.enumFromTo_Int 1 n) `ofType` ty sumsq_zip :: (Num a, I.Vector v a) => Ty (v a) -> Len -> a sumsq_zip ty (Len n) = named "sumsq(zip)" $ let as = I.map fromIntegral (I.enumFromTo_Int 1 n) `ofType` ty in I.sum $ I.zipWith (*) as as sum_evens :: (Num a, I.Vector v a) => Ty (v a) -> Len -> v a -> a sum_evens _ (Len n) as = I.sum $ I.map I.snd $ I.filter (even . I.fst) $ I.zip (I.enumFromTo_Int 0 (n-1)) as |]