module Data.Array.Accelerate.LLVM.PTX.Debug (
module Data.Array.Accelerate.Debug,
module Data.Array.Accelerate.LLVM.PTX.Debug,
) where
import Data.Array.Accelerate.Debug hiding ( timed, elapsed )
import Foreign.CUDA.Driver.Stream ( Stream )
import qualified Foreign.CUDA.Driver.Event as Event
import Control.Monad.Trans
import Control.Concurrent
import Data.Time.Clock
import System.CPUTime
import Text.Printf
import GHC.Float
timed
:: Flag
-> (Double -> Double -> Double -> String)
-> Maybe Stream
-> IO ()
-> IO ()
{-# INLINE timed #-}
timed f msg =
monitorProcTime (getFlag f) (\t1 t2 t3 -> traceIO f (msg t1 t2 t3))
monitorProcTime
:: MonadIO m
=> IO Bool
-> (Double -> Double -> Double -> IO ())
-> Maybe Stream
-> m a
-> m a
{-# INLINE monitorProcTime #-}
monitorProcTime enabled display stream action = do
yes <- if debuggingIsEnabled then liftIO enabled else return False
if yes
then do
gpuBegin <- liftIO $ Event.create []
gpuEnd <- liftIO $ Event.create []
wallBegin <- liftIO $ getCurrentTime
cpuBegin <- liftIO $ getCPUTime
_ <- liftIO $ Event.record gpuBegin stream
result <- action
_ <- liftIO $ Event.record gpuEnd stream
cpuEnd <- liftIO $ getCPUTime
wallEnd <- liftIO $ getCurrentTime
_ <- liftIO . forkIO $ do
Event.block gpuEnd
diff <- Event.elapsedTime gpuBegin gpuEnd
let gpuTime = float2Double $ diff * 1E-3
cpuTime = fromIntegral (cpuEnd - cpuBegin) * 1E-12
wallTime = realToFrac (diffUTCTime wallEnd wallBegin)
Event.destroy gpuBegin
Event.destroy gpuEnd
display wallTime cpuTime gpuTime
return result
else
action
{-# INLINE elapsed #-}
elapsed :: Double -> Double -> Double -> String
elapsed wallTime cpuTime gpuTime =
printf "%s (wall), %s (cpu), %s (gpu)"
(showFFloatSIBase (Just 3) 1000 wallTime "s")
(showFFloatSIBase (Just 3) 1000 cpuTime "s")
(showFFloatSIBase (Just 3) 1000 gpuTime "s")