{-# LANGUAGE QuasiQuotes #-} module FFT_bench where import Prelude () import Feldspar.Run import Feldspar.Data.Buffered import Feldspar.Data.Vector import FFT sizeOf_double_complex :: Data Length sizeOf_double_complex = 16 -- Checked on an x86_64 system -- TODO Feldspar should have a built-in `sizeof` function -- | @2^n@ twoTo :: (Num a, Bits a, PrimType a) => Data Index -> Data a twoTo n = 1 .<<. i2n n printTime_def = [cedecl| void printTime(typename clock_t start, typename clock_t end) { printf("CPU time (sec): %f\n", (double)(end-start) / CLOCKS_PER_SEC); } |] -- | Measure the time for 100 runs of 'fftCore' (excluding initialization) for -- arrays of the given size benchmark n = do addInclude "" addInclude "" addInclude "" addDefinition printTime_def start <- newObject "clock_t" False end <- newObject "clock_t" False st :: Store (Data (Complex Double)) <- newStore n inp <- unsafeFreezeStore n st callProc "memset" [ iarrArg inp , valArg (0 :: Data Index) , valArg (n*sizeOf_double_complex) ] n <- shareM (ilog2 (length inp)) ts <- manifestFresh $ Pull (twoTo (n-1)) (tw True (twoTo n)) -- Change `manifestFresh` to `return` to avoid pre-computing twiddle -- factors callProcAssign start "clock" [] for (0,1,Excl 100) $ \(_ :: Data Index) -> void $ fftCore st 2 ts n inp callProcAssign end "clock" [] callProc "printTime" [objArg start, objArg end] runBenchmark n = runCompiled' def {compilerAssertions = select []} -- Note: important to turn off assertions when running the benchmarks def {externalFlagsPre = ["-O3"], externalFlagsPost = ["-lm"]} (benchmark n)