{-# LANGUAGE ConstraintKinds #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE TypeFamilies #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE ScopedTypeVariables #-} -- | The simplification engine is only willing to hoist allocations -- out of loops if the memory block resulting from the allocation is -- dead at the end of the loop. If it is not, we may cause data -- hazards. -- -- This module rewrites loops with memory block merge parameters such -- that each memory block is copied at the end of the iteration, thus -- ensuring that any allocation inside the loop is dead at the end of -- the loop. This is only possible for allocations whose size is -- loop-invariant, although the initial size may differ from the size -- produced by the loop result. -- -- Additionally, inside parallel kernels we also copy the initial -- value. This has the effect of making the memory block returned by -- the array non-existential, which is important for later memory -- expansion to work. module Futhark.Optimise.DoubleBuffer ( doubleBuffer ) where import Control.Monad.State import Control.Monad.Writer import Control.Monad.Reader import qualified Data.Map.Strict as M import Data.Maybe import Data.List (find) import Futhark.Construct import Futhark.Representation.AST import Futhark.Pass.ExplicitAllocations (arraySizeInBytesExp) import qualified Futhark.Representation.ExplicitMemory.IndexFunction as IxFun import Futhark.Representation.ExplicitMemory hiding (Prog, Body, Stm, Pattern, PatElem, BasicOp, Exp, Lambda, FunDef, FParam, LParam, RetType) import Futhark.Pass import Futhark.Util (maybeHead) doubleBuffer :: Pass ExplicitMemory ExplicitMemory doubleBuffer = Pass { passName = "Double buffer" , passDescription = "Perform double buffering for merge parameters of sequential loops." , passFunction = intraproceduralTransformation optimiseFunDef } optimiseFunDef :: FunDef ExplicitMemory -> PassM (FunDef ExplicitMemory) optimiseFunDef fundec = modifyNameSource $ \src -> let m = runDoubleBufferM $ inScopeOf fundec $ optimiseBody $ funDefBody fundec (body', src') = runState (runReaderT m env) src in (fundec { funDefBody = body' }, src') where env = Env mempty doNotTouchLoop doNotTouchLoop ctx val body = return (mempty, ctx, val, body) data Env = Env { envScope :: Scope ExplicitMemory , envOptimiseLoop :: OptimiseLoop } newtype DoubleBufferM a = DoubleBufferM { runDoubleBufferM :: ReaderT Env (State VNameSource) a } deriving (Functor, Applicative, Monad, MonadReader Env, MonadFreshNames) instance HasScope ExplicitMemory DoubleBufferM where askScope = asks envScope instance LocalScope ExplicitMemory DoubleBufferM where localScope scope = local $ \env -> env { envScope = envScope env <> scope } optimiseBody :: Body ExplicitMemory -> DoubleBufferM (Body ExplicitMemory) optimiseBody body = do bnds' <- optimiseStms $ stmsToList $ bodyStms body return $ body { bodyStms = stmsFromList bnds' } optimiseStms :: [Stm ExplicitMemory] -> DoubleBufferM [Stm ExplicitMemory] optimiseStms [] = return [] optimiseStms (e:es) = do e_es <- optimiseStm e es' <- localScope (castScope $ scopeOf e_es) $ optimiseStms es return $ e_es ++ es' optimiseStm :: Stm ExplicitMemory -> DoubleBufferM [Stm ExplicitMemory] optimiseStm (Let pat aux (DoLoop ctx val form body)) = do body' <- localScope (scopeOf form <> scopeOfFParams (map fst $ ctx++val)) $ optimiseBody body opt_loop <- asks envOptimiseLoop (bnds, ctx', val', body'') <- opt_loop ctx val body' return $ bnds ++ [Let pat aux $ DoLoop ctx' val' form body''] optimiseStm (Let pat aux e) = pure . Let pat aux <$> mapExpM optimise e where optimise = identityMapper { mapOnBody = \_ x -> optimiseBody x :: DoubleBufferM (Body ExplicitMemory) , mapOnOp = optimiseOp } optimiseOp :: Op ExplicitMemory -> DoubleBufferM (Op ExplicitMemory) optimiseOp (Inner (SegOp op)) = local inSegOp $ Inner . SegOp <$> mapSegOpM mapper op where mapper = identitySegOpMapper { mapOnSegOpLambda = optimiseLambda , mapOnSegOpBody = optimiseKernelBody } inSegOp env = env { envOptimiseLoop = optimiseLoop } optimiseOp op = return op optimiseKernelBody :: KernelBody ExplicitMemory -> DoubleBufferM (KernelBody ExplicitMemory) optimiseKernelBody kbody = do stms' <- optimiseStms $ stmsToList $ kernelBodyStms kbody return $ kbody { kernelBodyStms = stmsFromList stms' } optimiseLambda :: Lambda ExplicitMemory -> DoubleBufferM (Lambda ExplicitMemory) optimiseLambda lam = do body <- localScope (castScope $ scopeOf lam) $ optimiseBody $ lambdaBody lam return lam { lambdaBody = body } type OptimiseLoop = [(FParam ExplicitMemory, SubExp)] -> [(FParam ExplicitMemory, SubExp)] -> Body ExplicitMemory -> DoubleBufferM ([Stm ExplicitMemory], [(FParam ExplicitMemory, SubExp)], [(FParam ExplicitMemory, SubExp)], Body ExplicitMemory) optimiseLoop :: OptimiseLoop optimiseLoop ctx val body = do -- We start out by figuring out which of the merge variables should -- be double-buffered. buffered <- doubleBufferMergeParams (zip (map fst ctx) (bodyResult body)) (map fst merge) (boundInBody body) -- Then create the allocations of the buffers and copies of the -- initial values. (merge', allocs) <- allocStms merge buffered -- Modify the loop body to copy buffered result arrays. let body' = doubleBufferResult (map fst merge) buffered body (ctx', val') = splitAt (length ctx) merge' -- Modify the initial merge p return (allocs, ctx', val', body') where merge = ctx ++ val -- | The booleans indicate whether we should also play with the -- initial merge values. data DoubleBuffer = BufferAlloc VName (PrimExp VName) Space Bool | BufferCopy VName IxFun VName Bool -- ^ First name is the memory block to copy to, -- second is the name of the array copy. | NoBuffer deriving (Show) doubleBufferMergeParams :: MonadFreshNames m => [(FParam ExplicitMemory, SubExp)] -> [FParam ExplicitMemory] -> Names -> m [DoubleBuffer] doubleBufferMergeParams ctx_and_res val_params bound_in_loop = evalStateT (mapM buffer val_params) M.empty where loopVariant v = v `nameIn` bound_in_loop || v `elem` map (paramName . fst) ctx_and_res loopInvariantSize (Constant v) = Just (Constant v, True) loopInvariantSize (Var v) = case find ((==v) . paramName . fst) ctx_and_res of Just (_, Constant val) -> Just (Constant val, False) Just (_, Var v') | not $ loopVariant v' -> Just (Var v', False) Just _ -> Nothing Nothing -> Just (Var v, True) sizeForMem mem = maybeHead $ mapMaybe (arrayInMem . paramAttr) val_params where arrayInMem (MemArray pt shape _ (ArrayIn arraymem ixfun)) | IxFun.isDirect ixfun, Just (dims, b) <- mapAndUnzipM loopInvariantSize $ shapeDims shape, mem == arraymem = Just (arraySizeInBytesExp $ Array pt (Shape dims) NoUniqueness, or b) arrayInMem _ = Nothing buffer fparam = case paramType fparam of Mem space | Just (size, b) <- sizeForMem $ paramName fparam -> do -- Let us double buffer this! bufname <- lift $ newVName "double_buffer_mem" modify $ M.insert (paramName fparam) (bufname, b) return $ BufferAlloc bufname size space b Array {} | MemArray _ _ _ (ArrayIn mem ixfun) <- paramAttr fparam -> do buffered <- gets $ M.lookup mem case buffered of Just (bufname, b) -> do copyname <- lift $ newVName "double_buffer_array" return $ BufferCopy bufname ixfun copyname b Nothing -> return NoBuffer _ -> return NoBuffer allocStms :: [(FParam ExplicitMemory, SubExp)] -> [DoubleBuffer] -> DoubleBufferM ([(FParam ExplicitMemory, SubExp)], [Stm ExplicitMemory]) allocStms merge = runWriterT . zipWithM allocation merge where allocation m@(Param pname _, _) (BufferAlloc name size space b) = do stms <- lift $ runBinder_ $ do size' <- letSubExp "double_buffer_size" =<< toExp size letBindNames_ [name] $ Op $ Alloc size' space tell $ stmsToList stms if b then return (Param pname $ MemMem space, Var name) else return m allocation (f, Var v) (BufferCopy mem _ _ b) | b = do v_copy <- lift $ newVName $ baseString v ++ "_double_buffer_copy" (_v_mem, v_ixfun) <- lift $ lookupArraySummary v let bt = elemType $ paramType f shape = arrayShape $ paramType f bound = MemArray bt shape NoUniqueness $ ArrayIn mem v_ixfun tell [Let (Pattern [] [PatElem v_copy bound]) (defAux ()) $ BasicOp $ Copy v] -- It is important that we treat this as a consumption, to -- avoid the Copy from being hoisted out of any enclosing -- loops. Since we re-use (=overwrite) memory in the loop, -- the copy is critical for initialisation. See issue #816. let uniqueMemInfo (MemArray pt pshape _ ret) = MemArray pt pshape Unique ret uniqueMemInfo info = info return (uniqueMemInfo <$> f, Var v_copy) allocation (f, se) _ = return (f, se) doubleBufferResult :: [FParam ExplicitMemory] -> [DoubleBuffer] -> Body ExplicitMemory -> Body ExplicitMemory doubleBufferResult valparams buffered (Body () bnds res) = let (ctx_res, val_res) = splitAt (length res - length valparams) res (copybnds,val_res') = unzip $ zipWith3 buffer valparams buffered val_res in Body () (bnds<>stmsFromList (catMaybes copybnds)) $ ctx_res ++ val_res' where buffer _ (BufferAlloc bufname _ _ _) _ = (Nothing, Var bufname) buffer fparam (BufferCopy bufname ixfun copyname _) (Var v) = -- To construct the copy we will need to figure out its type -- based on the type of the function parameter. let t = resultType $ paramType fparam summary = MemArray (elemType t) (arrayShape t) NoUniqueness $ ArrayIn bufname ixfun copybnd = Let (Pattern [] [PatElem copyname summary]) (defAux ()) $ BasicOp $ Copy v in (Just copybnd, Var copyname) buffer _ _ se = (Nothing, se) parammap = M.fromList $ zip (map paramName valparams) res resultType t = t `setArrayDims` map substitute (arrayDims t) substitute (Var v) | Just replacement <- M.lookup v parammap = replacement substitute se = se