{-# LANGUAGE ForeignFunctionInterface #-}
-- |
-- Module      : Data.Array.Accelerate.LLVM.PTX.Debug
-- Copyright   : [2014..2020] The Accelerate Team
-- License     : BSD3
--
-- Maintainer  : Trevor L. McDonell <trevor.mcdonell@gmail.com>
-- Stability   : experimental
-- Portability : non-portable (GHC extensions)
--

module Data.Array.Accelerate.LLVM.PTX.Debug (

  module Data.Array.Accelerate.Debug,
  module Data.Array.Accelerate.LLVM.PTX.Debug,

) where

import Data.Array.Accelerate.Debug                      hiding ( timed, elapsed )

import Foreign.CUDA.Driver.Stream                       ( Stream )
import qualified Foreign.CUDA.Driver.Event              as Event

import Control.Monad.Trans
import Control.Concurrent
import System.CPUTime
import Text.Printf

import GHC.Float


-- | Execute an action and time the results. The second argument specifies how
-- to format the output string given elapsed GPU and CPU time respectively
--
timed
    :: Flag
    -> (Double -> Double -> Double -> String)
    -> Maybe Stream
    -> IO ()
    -> IO ()
{-# INLINE timed #-}
timed :: Flag
-> (Double -> Double -> Double -> String)
-> Maybe Stream
-> IO ()
-> IO ()
timed Flag
f Double -> Double -> Double -> String
msg =
  IO Bool
-> (Double -> Double -> Double -> IO ())
-> Maybe Stream
-> IO ()
-> IO ()
forall (m :: * -> *) a.
MonadIO m =>
IO Bool
-> (Double -> Double -> Double -> IO ())
-> Maybe Stream
-> m a
-> m a
monitorProcTime (Flag -> IO Bool
getFlag Flag
f) (\Double
t1 Double
t2 Double
t3 -> Flag -> String -> IO ()
traceIO Flag
f (Double -> Double -> Double -> String
msg Double
t1 Double
t2 Double
t3))

monitorProcTime
    :: MonadIO m
    => IO Bool
    -> (Double -> Double -> Double -> IO ())
    -> Maybe Stream
    -> m a
    -> m a
{-# INLINE monitorProcTime #-}
monitorProcTime :: IO Bool
-> (Double -> Double -> Double -> IO ())
-> Maybe Stream
-> m a
-> m a
monitorProcTime IO Bool
enabled Double -> Double -> Double -> IO ()
display Maybe Stream
stream m a
action = do
  Bool
yes <- if Bool
debuggingIsEnabled then IO Bool -> m Bool
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO IO Bool
enabled else Bool -> m Bool
forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False
  if Bool
yes
    then do
      Event
gpuBegin  <- IO Event -> m Event
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Event -> m Event) -> IO Event -> m Event
forall a b. (a -> b) -> a -> b
$ [EventFlag] -> IO Event
Event.create []
      Event
gpuEnd    <- IO Event -> m Event
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Event -> m Event) -> IO Event -> m Event
forall a b. (a -> b) -> a -> b
$ [EventFlag] -> IO Event
Event.create []
      Double
wallBegin <- IO Double -> m Double
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Double -> m Double) -> IO Double -> m Double
forall a b. (a -> b) -> a -> b
$ IO Double
getMonotonicTime
      Integer
cpuBegin  <- IO Integer -> m Integer
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Integer -> m Integer) -> IO Integer -> m Integer
forall a b. (a -> b) -> a -> b
$ IO Integer
getCPUTime
      ()
_         <- IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ()) -> IO () -> m ()
forall a b. (a -> b) -> a -> b
$ Event -> Maybe Stream -> IO ()
Event.record Event
gpuBegin Maybe Stream
stream
      a
result    <- m a
action
      ()
_         <- IO () -> m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> m ()) -> IO () -> m ()
forall a b. (a -> b) -> a -> b
$ Event -> Maybe Stream -> IO ()
Event.record Event
gpuEnd Maybe Stream
stream
      Integer
cpuEnd    <- IO Integer -> m Integer
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Integer -> m Integer) -> IO Integer -> m Integer
forall a b. (a -> b) -> a -> b
$ IO Integer
getCPUTime
      Double
wallEnd   <- IO Double -> m Double
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO Double -> m Double) -> IO Double -> m Double
forall a b. (a -> b) -> a -> b
$ IO Double
getMonotonicTime

      -- Wait for the GPU to finish executing then display the timing execution
      -- message. Do this in a separate thread so that the remaining kernels can
      -- be queued asynchronously.
      --
      ThreadId
_         <- IO ThreadId -> m ThreadId
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO ThreadId -> m ThreadId)
-> (IO () -> IO ThreadId) -> IO () -> m ThreadId
forall b c a. (b -> c) -> (a -> b) -> a -> c
. IO () -> IO ThreadId
forkIO (IO () -> m ThreadId) -> IO () -> m ThreadId
forall a b. (a -> b) -> a -> b
$ do
        Event -> IO ()
Event.block Event
gpuEnd
        Float
diff    <- Event -> Event -> IO Float
Event.elapsedTime Event
gpuBegin Event
gpuEnd
        let gpuTime :: Double
gpuTime  = Float -> Double
float2Double (Float -> Double) -> Float -> Double
forall a b. (a -> b) -> a -> b
$ Float
diff Float -> Float -> Float
forall a. Num a => a -> a -> a
* Float
1E-3                   -- milliseconds
            cpuTime :: Double
cpuTime  = Integer -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer
cpuEnd Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
cpuBegin) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
1E-12     -- picoseconds
            wallTime :: Double
wallTime = Double
wallEnd Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
wallBegin                          -- seconds

        Event -> IO ()
Event.destroy Event
gpuBegin
        Event -> IO ()
Event.destroy Event
gpuEnd
        --
        Double -> Double -> Double -> IO ()
display Double
wallTime Double
cpuTime Double
gpuTime
      --
      a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return a
result

    else
      m a
action


{-# INLINE elapsed #-}
elapsed :: Double -> Double -> Double -> String
elapsed :: Double -> Double -> Double -> String
elapsed Double
wallTime Double
cpuTime Double
gpuTime =
  String -> String -> String -> String -> String
forall r. PrintfType r => String -> r
printf String
"%s (wall), %s (cpu), %s (gpu)"
    (Maybe Int -> Double -> Double -> String -> String
forall a. RealFloat a => Maybe Int -> a -> a -> String -> String
showFFloatSIBase (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
3) Double
1000 Double
wallTime String
"s")
    (Maybe Int -> Double -> Double -> String -> String
forall a. RealFloat a => Maybe Int -> a -> a -> String -> String
showFFloatSIBase (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
3) Double
1000 Double
cpuTime String
"s")
    (Maybe Int -> Double -> Double -> String -> String
forall a. RealFloat a => Maybe Int -> a -> a -> String -> String
showFFloatSIBase (Int -> Maybe Int
forall a. a -> Maybe a
Just Int
3) Double
1000 Double
gpuTime String
"s")

-- accelerate/cbits/clock.c
foreign import ccall unsafe "clock_gettime_monotonic_seconds" getMonotonicTime :: IO Double