-- | Optimisation pipelines.
module Futhark.Passes
  ( standardPipeline,
    sequentialPipeline,
    kernelsPipeline,
    sequentialCpuPipeline,
    gpuPipeline,
    mcPipeline,
    multicorePipeline,
  )
where

import Control.Category ((>>>))
import Futhark.IR.GPU (GPU)
import Futhark.IR.GPUMem (GPUMem)
import Futhark.IR.MC (MC)
import Futhark.IR.MCMem (MCMem)
import Futhark.IR.SOACS (SOACS, usesAD)
import Futhark.IR.Seq (Seq)
import Futhark.IR.SeqMem (SeqMem)
import Futhark.Optimise.ArrayShortCircuiting qualified as ArrayShortCircuiting
import Futhark.Optimise.CSE
import Futhark.Optimise.DoubleBuffer
import Futhark.Optimise.EntryPointMem
import Futhark.Optimise.Fusion
import Futhark.Optimise.GenRedOpt
import Futhark.Optimise.HistAccs
import Futhark.Optimise.InPlaceLowering
import Futhark.Optimise.InliningDeadFun
import Futhark.Optimise.MemoryBlockMerging qualified as MemoryBlockMerging
import Futhark.Optimise.MergeGPUBodies
import Futhark.Optimise.ReduceDeviceSyncs
import Futhark.Optimise.Sink
import Futhark.Optimise.TileLoops
import Futhark.Optimise.Unstream
import Futhark.Pass.AD
import Futhark.Pass.ExpandAllocations
import Futhark.Pass.ExplicitAllocations.GPU qualified as GPU
import Futhark.Pass.ExplicitAllocations.MC qualified as MC
import Futhark.Pass.ExplicitAllocations.Seq qualified as Seq
import Futhark.Pass.ExtractKernels
import Futhark.Pass.ExtractMulticore
import Futhark.Pass.FirstOrderTransform
import Futhark.Pass.KernelBabysitting
import Futhark.Pass.LiftAllocations as LiftAllocations
import Futhark.Pass.LowerAllocations as LowerAllocations
import Futhark.Pass.Simplify
import Futhark.Pipeline

-- | A pipeline used by all current compilers.  Performs inlining,
-- fusion, and various forms of cleanup.  This pipeline will be
-- followed by another one that deals with parallelism and memory.
standardPipeline :: Pipeline SOACS SOACS
standardPipeline :: Pipeline SOACS SOACS
standardPipeline =
  forall rep. Checkable rep => [Pass rep rep] -> Pipeline rep rep
passes
    [ Pass SOACS SOACS
simplifySOACS,
      Pass SOACS SOACS
inlineConservatively,
      Pass SOACS SOACS
simplifySOACS,
      Pass SOACS SOACS
inlineAggressively,
      Pass SOACS SOACS
simplifySOACS,
      forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
True,
      Pass SOACS SOACS
simplifySOACS,
      Pass SOACS SOACS
fuseSOACs,
      forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
True,
      Pass SOACS SOACS
simplifySOACS,
      Pass SOACS SOACS
removeDeadFunctions
    ]
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall rep.
(Prog rep -> Bool) -> Pipeline rep rep -> Pipeline rep rep
condPipeline Prog SOACS -> Bool
usesAD Pipeline SOACS SOACS
adPipeline

-- | This is the pipeline that applies the AD transformation and
-- subsequent interesting optimisations.
adPipeline :: Pipeline SOACS SOACS
adPipeline :: Pipeline SOACS SOACS
adPipeline =
  forall rep. Checkable rep => [Pass rep rep] -> Pipeline rep rep
passes
    [ Pass SOACS SOACS
applyAD,
      Pass SOACS SOACS
simplifySOACS,
      forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
True,
      Pass SOACS SOACS
fuseSOACs,
      forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
True,
      Pass SOACS SOACS
simplifySOACS
    ]

-- | The pipeline used by the CUDA and OpenCL backends, but before
-- adding memory information.  Includes 'standardPipeline'.
kernelsPipeline :: Pipeline SOACS GPU
kernelsPipeline :: Pipeline SOACS GPU
kernelsPipeline =
  Pipeline SOACS SOACS
standardPipeline
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall torep fromrep.
Checkable torep =>
Pass fromrep torep -> Pipeline fromrep torep
onePass Pass SOACS GPU
extractKernels
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall rep. Checkable rep => [Pass rep rep] -> Pipeline rep rep
passes
      [ Pass GPU GPU
simplifyGPU,
        Pass GPU GPU
optimiseGenRed,
        Pass GPU GPU
simplifyGPU,
        Pass GPU GPU
tileLoops,
        Pass GPU GPU
simplifyGPU,
        Pass GPU GPU
histAccsGPU,
        Pass GPU GPU
babysitKernels,
        Pass GPU GPU
simplifyGPU,
        Pass GPU GPU
unstreamGPU,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
True,
        Pass GPU GPU
simplifyGPU,
        Pass GPU GPU
sinkGPU, -- Sink reads before migrating them.
        Pass GPU GPU
reduceDeviceSyncs,
        Pass GPU GPU
simplifyGPU, -- Simplify and hoist storages.
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
True, -- Eliminate duplicate storages.
        Pass GPU GPU
mergeGPUBodies,
        Pass GPU GPU
simplifyGPU, -- Cleanup merged GPUBody kernels.
        Pass GPU GPU
sinkGPU, -- Sink reads within GPUBody kernels.
        Pass GPU GPU
inPlaceLoweringGPU
      ]

-- | The pipeline used by the sequential backends.  Turns all
-- parallelism into sequential loops.  Includes 'standardPipeline'.
sequentialPipeline :: Pipeline SOACS Seq
sequentialPipeline :: Pipeline SOACS Seq
sequentialPipeline =
  Pipeline SOACS SOACS
standardPipeline
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall torep fromrep.
Checkable torep =>
Pass fromrep torep -> Pipeline fromrep torep
onePass forall rep. FirstOrderRep rep => Pass SOACS rep
firstOrderTransform
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall rep. Checkable rep => [Pass rep rep] -> Pipeline rep rep
passes
      [ Pass Seq Seq
simplifySeq,
        Pass Seq Seq
inPlaceLoweringSeq
      ]

-- | Run 'sequentialPipeline', then add memory information (and
-- optimise it slightly).
sequentialCpuPipeline :: Pipeline SOACS SeqMem
sequentialCpuPipeline :: Pipeline SOACS SeqMem
sequentialCpuPipeline =
  Pipeline SOACS Seq
sequentialPipeline
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall torep fromrep.
Checkable torep =>
Pass fromrep torep -> Pipeline fromrep torep
onePass Pass Seq SeqMem
Seq.explicitAllocations
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall rep. Checkable rep => [Pass rep rep] -> Pipeline rep rep
passes
      [ forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass SeqMem SeqMem
simplifySeqMem,
        Pass SeqMem SeqMem
entryPointMemSeq,
        Pass SeqMem SeqMem
simplifySeqMem,
        Pass SeqMem SeqMem
LiftAllocations.liftAllocationsSeqMem,
        Pass SeqMem SeqMem
simplifySeqMem,
        Pass SeqMem SeqMem
ArrayShortCircuiting.optimiseSeqMem,
        Pass SeqMem SeqMem
simplifySeqMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass SeqMem SeqMem
simplifySeqMem,
        Pass SeqMem SeqMem
LowerAllocations.lowerAllocationsSeqMem,
        Pass SeqMem SeqMem
simplifySeqMem
      ]

-- | Run 'kernelsPipeline', then add memory information (and optimise
-- it a lot).
gpuPipeline :: Pipeline SOACS GPUMem
gpuPipeline :: Pipeline SOACS GPUMem
gpuPipeline =
  Pipeline SOACS GPU
kernelsPipeline
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall torep fromrep.
Checkable torep =>
Pass fromrep torep -> Pipeline fromrep torep
onePass Pass GPU GPUMem
GPU.explicitAllocations
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall rep. Checkable rep => [Pass rep rep] -> Pipeline rep rep
passes
      [ Pass GPUMem GPUMem
simplifyGPUMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass GPUMem GPUMem
simplifyGPUMem,
        Pass GPUMem GPUMem
entryPointMemGPU,
        Pass GPUMem GPUMem
doubleBufferGPU,
        Pass GPUMem GPUMem
simplifyGPUMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass GPUMem GPUMem
LiftAllocations.liftAllocationsGPUMem,
        Pass GPUMem GPUMem
simplifyGPUMem,
        Pass GPUMem GPUMem
ArrayShortCircuiting.optimiseGPUMem,
        Pass GPUMem GPUMem
simplifyGPUMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass GPUMem GPUMem
simplifyGPUMem,
        Pass GPUMem GPUMem
LowerAllocations.lowerAllocationsGPUMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass GPUMem GPUMem
simplifyGPUMem,
        Pass GPUMem GPUMem
MemoryBlockMerging.optimise,
        Pass GPUMem GPUMem
simplifyGPUMem,
        Pass GPUMem GPUMem
expandAllocations,
        Pass GPUMem GPUMem
simplifyGPUMem
      ]

-- | Run 'standardPipeline' and then convert to multicore
-- representation (and do a bunch of optimisation).
mcPipeline :: Pipeline SOACS MC
mcPipeline :: Pipeline SOACS MC
mcPipeline =
  Pipeline SOACS SOACS
standardPipeline
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall torep fromrep.
Checkable torep =>
Pass fromrep torep -> Pipeline fromrep torep
onePass Pass SOACS MC
extractMulticore
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall rep. Checkable rep => [Pass rep rep] -> Pipeline rep rep
passes
      [ Pass MC MC
simplifyMC,
        Pass MC MC
unstreamMC,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
True,
        Pass MC MC
simplifyMC,
        Pass MC MC
sinkMC,
        Pass MC MC
inPlaceLoweringMC
      ]

-- | Run 'mcPipeline' and then add memory information.
multicorePipeline :: Pipeline SOACS MCMem
multicorePipeline :: Pipeline SOACS MCMem
multicorePipeline =
  Pipeline SOACS MC
mcPipeline
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall torep fromrep.
Checkable torep =>
Pass fromrep torep -> Pipeline fromrep torep
onePass Pass MC MCMem
MC.explicitAllocations
    forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> forall rep. Checkable rep => [Pass rep rep] -> Pipeline rep rep
passes
      [ Pass MCMem MCMem
simplifyMCMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass MCMem MCMem
simplifyMCMem,
        Pass MCMem MCMem
entryPointMemMC,
        Pass MCMem MCMem
doubleBufferMC,
        Pass MCMem MCMem
simplifyMCMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass MCMem MCMem
LiftAllocations.liftAllocationsMCMem,
        Pass MCMem MCMem
simplifyMCMem,
        Pass MCMem MCMem
ArrayShortCircuiting.optimiseMCMem,
        Pass MCMem MCMem
simplifyMCMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass MCMem MCMem
simplifyMCMem,
        Pass MCMem MCMem
LowerAllocations.lowerAllocationsMCMem,
        forall rep.
(AliasableRep rep, CSEInOp (Op (Aliases rep))) =>
Bool -> Pass rep rep
performCSE Bool
False,
        Pass MCMem MCMem
simplifyMCMem
      ]