root/compiler/codeGen/CgTailCall.lhs

Revision 4384e146640230399b38cd62e8e5df391f72c3a7, 18.0 KB (checked in by David Terei <davidterei@…>, 5 months ago)

Track STG live register information for use in LLVM

We now carry around with CmmJump? statements a list of
the STG registers that are live at that jump site.
This is used by the LLVM backend so it can avoid
unnesecarily passing around dead registers, improving
perfromance. This gives us the framework to finally
fix trac #4308.

  • Property mode set to 100644
Line 
1%
2% (c) The University of Glasgow 2006
3% (c) The GRASP/AQUA Project, Glasgow University, 1992-1998
4%
5% Code generation for tail calls.
6
7\begin{code}
8{-# OPTIONS -fno-warn-tabs #-}
9-- The above warning supression flag is a temporary kludge.
10-- While working on this module you are encouraged to remove it and
11-- detab the module (please do the detabbing in a separate patch). See
12--     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#TabsvsSpaces
13-- for details
14
15module CgTailCall (
16        cgTailCall, performTailCall,
17        performReturn, performPrimReturn,
18        returnUnboxedTuple, ccallReturnUnboxedTuple,
19        pushUnboxedTuple,
20        tailCallPrimOp,
21        tailCallPrimCall,
22
23        pushReturnAddress
24    ) where
25
26#include "HsVersions.h"
27
28import CgMonad
29import CgBindery
30import CgInfoTbls
31import CgCallConv
32import CgStackery
33import CgHeapery
34import CgUtils
35import CgTicky
36import ClosureInfo
37import OldCmm   
38import OldCmmUtils
39import CLabel
40import Type
41import Id
42import StgSyn
43import PrimOp
44import Outputable
45import StaticFlags
46
47import Control.Monad
48import Data.Maybe
49
50-----------------------------------------------------------------------------
51-- Tail Calls
52
53cgTailCall :: Id -> [StgArg] -> Code
54
55-- Here's the code we generate for a tail call.  (NB there may be no
56-- arguments, in which case this boils down to just entering a variable.)
57--
58--    * Put args in the top locations of the stack.
59--    * Adjust the stack ptr
60--    * Make R1 point to the function closure if necessary.
61--    * Perform the call.
62--
63-- Things to be careful about:
64--
65--    * Don't overwrite stack locations before you have finished with
66--      them (remember you need the function and the as-yet-unmoved
67--      arguments).
68--    * Preferably, generate no code to replace x by x on the stack (a
69--      common situation in tail-recursion).
70--    * Adjust the stack high water mark appropriately.
71--
72-- Treat unboxed locals exactly like literals (above) except use the addr
73-- mode for the local instead of (CLit lit) in the assignment.
74
75cgTailCall fun args
76  = do  { fun_info <- getCgIdInfo fun
77
78        ; if isUnLiftedType (idType fun)
79          then  -- Primitive return
80                ASSERT( null args )
81            do  { fun_amode <- idInfoToAmode fun_info
82                ; performPrimReturn (cgIdInfoArgRep fun_info) fun_amode } 
83
84          else -- Normal case, fun is boxed
85            do  { arg_amodes <- getArgAmodes args
86                ; performTailCall fun_info arg_amodes noStmts }
87        }
88               
89
90-- -----------------------------------------------------------------------------
91-- The guts of a tail-call
92
93performTailCall 
94        :: CgIdInfo             -- The function
95        -> [(CgRep,CmmExpr)]    -- Args
96        -> CmmStmts             -- Pending simultaneous assignments
97                                --  *** GUARANTEED to contain only stack assignments.
98        -> Code
99
100performTailCall fun_info arg_amodes pending_assts
101  | Just join_sp <- maybeLetNoEscape fun_info
102  =        -- A let-no-escape is slightly different, because we
103           -- arrange the stack arguments into pointers and non-pointers
104           -- to make the heap check easier.  The tail-call sequence
105           -- is very similar to returning an unboxed tuple, so we
106           -- share some code.
107     do { (final_sp, arg_assts, live) <- pushUnboxedTuple join_sp arg_amodes
108        ; emitSimultaneously (pending_assts `plusStmts` arg_assts)
109        ; let lbl = enterReturnPtLabel (idUnique (cgIdInfoId fun_info))
110        ; doFinalJump final_sp True $ jumpToLbl lbl (Just live) }
111
112  | otherwise
113  = do  { fun_amode <- idInfoToAmode fun_info
114        ; let assignSt  = CmmAssign nodeReg fun_amode
115              node_asst = oneStmt assignSt
116              node_live = Just [node]
117              (opt_node_asst, opt_node_live)
118                      | nodeMustPointToIt lf_info = (node_asst, node_live)
119                      | otherwise                 = (noStmts, Just [])
120        ; EndOfBlockInfo sp _ <- getEndOfBlockInfo
121
122        ; dflags <- getDynFlags
123        ; case (getCallMethod dflags fun_name fun_has_cafs lf_info (length arg_amodes)) of
124
125            -- Node must always point to things we enter
126            EnterIt -> do
127                { emitSimultaneously (node_asst `plusStmts` pending_assts) 
128                ; let target       = entryCode (closureInfoPtr (CmmReg nodeReg))
129                      enterClosure = stmtC (CmmJump target node_live)
130                      -- If this is a scrutinee
131                      -- let's check if the closure is a constructor
132                      -- so we can directly jump to the alternatives switch
133                      -- statement.
134                      jumpInstr = getEndOfBlockInfo >>=
135                                  maybeSwitchOnCons enterClosure
136                ; doFinalJump sp False jumpInstr }
137   
138            -- A function, but we have zero arguments.  It is already in WHNF,
139            -- so we can just return it. 
140            -- As with any return, Node must point to it.
141            ReturnIt -> do
142                { emitSimultaneously (node_asst `plusStmts` pending_assts)
143                ; doFinalJump sp False $ emitReturnInstr node_live }
144   
145            -- A real constructor.  Don't bother entering it,
146            -- just do the right sort of return instead.
147            -- As with any return, Node must point to it.
148            ReturnCon _ -> do
149                { emitSimultaneously (node_asst `plusStmts` pending_assts)
150                ; doFinalJump sp False $ emitReturnInstr node_live }
151
152            JumpToIt lbl -> do
153                { emitSimultaneously (opt_node_asst `plusStmts` pending_assts)
154                ; doFinalJump sp False $ jumpToLbl lbl opt_node_live }
155   
156            -- A slow function call via the RTS apply routines
157            -- Node must definitely point to the thing
158            SlowCall -> do 
159                {  when (not (null arg_amodes)) $ do
160                   { if (isKnownFun lf_info) 
161                        then tickyKnownCallTooFewArgs
162                        else tickyUnknownCall
163                   ; tickySlowCallPat (map fst arg_amodes) 
164                   }
165
166                ; let (apply_lbl, args, extra_args) 
167                        = constructSlowCall arg_amodes
168
169                ; directCall sp apply_lbl args extra_args node_live
170                        (node_asst `plusStmts` pending_assts)
171
172                }
173   
174            -- A direct function call (possibly with some left-over arguments)
175            DirectEntry lbl arity -> do
176                { if arity == length arg_amodes
177                        then tickyKnownCallExact
178                        else do tickyKnownCallExtraArgs
179                                tickySlowCallPat (map fst (drop arity arg_amodes))
180
181                ; let
182                     -- The args beyond the arity go straight on the stack
183                     (arity_args, extra_args) = splitAt arity arg_amodes
184     
185                ; directCall sp lbl arity_args extra_args opt_node_live
186                        (opt_node_asst `plusStmts` pending_assts)
187                }
188        }
189  where
190    fun_id    = cgIdInfoId fun_info
191    fun_name  = idName fun_id
192    lf_info   = cgIdInfoLF fun_info
193    fun_has_cafs = idCafInfo fun_id
194    untag_node = CmmAssign nodeReg (cmmUntag (CmmReg nodeReg))
195    -- Test if closure is a constructor
196    maybeSwitchOnCons enterClosure eob
197              | EndOfBlockInfo _ (CaseAlts lbl _ _) <- eob,
198                not opt_SccProfilingOn
199                -- we can't shortcut when profiling is on, because we have
200                -- to enter a closure to mark it as "used" for LDV profiling
201              = do { is_constr <- newLabelC
202                   -- Is the pointer tagged?
203                   -- Yes, jump to switch statement
204                   ; stmtC (CmmCondBranch (cmmIsTagged (CmmReg nodeReg)) 
205                                is_constr)
206                   -- No, enter the closure.
207                   ; enterClosure
208                   ; labelC is_constr
209                   ; stmtC (CmmJump (entryCode $
210                               CmmLit (CmmLabel lbl)) (Just [node]))
211                   }
212{-
213              -- This is a scrutinee for a case expression
214              -- so let's see if we can directly inspect the closure
215              | EndOfBlockInfo _ (CaseAlts lbl _ _ _) <- eob
216              = do { no_cons <- newLabelC
217                   -- Both the NCG and gcc optimize away the temp
218                   ; z <- newTemp  wordRep
219                   ; stmtC (CmmAssign z tag_expr)
220                   ; let tag = CmmReg z
221                   -- Is the closure a cons?
222                   ; stmtC (CmmCondBranch (cond1 tag) no_cons)
223                   ; stmtC (CmmCondBranch (cond2 tag) no_cons)
224                   -- Yes, jump to switch statement
225                   ; stmtC (CmmJump (CmmLit (CmmLabel lbl)))
226                   ; labelC no_cons
227                   -- No, enter the closure.
228                   ; enterClosure
229                   }
230-}
231              -- No case expression involved, enter the closure.
232              | otherwise
233              = do { stmtC untag_node
234                   ; enterClosure
235                   }
236        where
237          --cond1 tag  = cmmULtWord tag lowCons
238          -- More efficient than the above?
239{-
240          tag_expr   = cmmGetClosureType (CmmReg nodeReg)
241          cond1 tag  = cmmEqWord tag (CmmLit (mkIntCLit 0))
242          cond2 tag  = cmmUGtWord tag highCons
243          lowCons    = CmmLit (mkIntCLit 1)
244            -- CONSTR
245          highCons   = CmmLit (mkIntCLit 8)
246            -- CONSTR_NOCAF_STATIC (from ClosureType.h)
247-}
248
249directCall :: VirtualSpOffset -> CLabel -> [(CgRep, CmmExpr)]
250           -> [(CgRep, CmmExpr)] -> Maybe [GlobalReg] -> CmmStmts
251           -> Code
252directCall sp lbl args extra_args live_node assts = do
253  let
254        -- First chunk of args go in registers
255        (reg_arg_amodes, stk_args) = assignCallRegs args
256     
257        -- Any "extra" arguments are placed in frames on the
258        -- stack after the other arguments.
259        slow_stk_args = slowArgs extra_args
260
261        reg_assts = assignToRegs reg_arg_amodes
262        live_args = map snd reg_arg_amodes
263        live_regs = Just $ (fromMaybe [] live_node) ++ live_args
264  --
265  (final_sp, stk_assts) <- mkStkAmodes sp (stk_args ++ slow_stk_args)
266  emitSimultaneously $ reg_assts `plusStmts` stk_assts `plusStmts` assts
267  doFinalJump final_sp False $ jumpToLbl lbl live_regs
268
269-- -----------------------------------------------------------------------------
270-- The final clean-up before we do a jump at the end of a basic block.
271-- This code is shared by tail-calls and returns.
272
273doFinalJump :: VirtualSpOffset -> Bool -> Code -> Code 
274doFinalJump final_sp is_let_no_escape jump_code
275  = do  { -- Adjust the high-water mark if necessary
276          adjustStackHW final_sp
277
278        -- Push a return address if necessary (after the assignments
279        -- above, in case we clobber a live stack location)
280        --
281        -- DONT push the return address when we're about to jump to a
282        -- let-no-escape: the final tail call in the let-no-escape
283        -- will do this.
284        ; eob <- getEndOfBlockInfo
285        ; whenC (not is_let_no_escape) (pushReturnAddress eob)
286
287            -- Final adjustment of Sp/Hp
288        ; adjustSpAndHp final_sp
289
290            -- and do the jump
291        ; jump_code }
292
293-- ----------------------------------------------------------------------------
294-- A general return (just a special case of doFinalJump, above)
295
296performReturn :: Code   -- The code to execute to actually do the return
297              -> Code
298
299performReturn finish_code
300  = do  { EndOfBlockInfo args_sp _sequel <- getEndOfBlockInfo
301        ; doFinalJump args_sp False finish_code }
302
303-- ----------------------------------------------------------------------------
304-- Primitive Returns
305-- Just load the return value into the right register, and return.
306
307performPrimReturn :: CgRep -> CmmExpr -> Code
308
309-- non-void return value
310performPrimReturn rep amode | not (isVoidArg rep)
311  = do { stmtC (CmmAssign ret_reg amode)
312       ; performReturn $ emitReturnInstr live_regs }
313  where
314    -- careful here as 'dataReturnConvPrim' will panic if given a Void rep
315    ret_reg@(CmmGlobal r) = dataReturnConvPrim rep
316    live_regs = Just [r]
317
318-- void return value
319performPrimReturn _ _
320  = performReturn $ emitReturnInstr (Just [])
321
322
323-- ---------------------------------------------------------------------------
324-- Unboxed tuple returns
325
326-- These are a bit like a normal tail call, except that:
327--
328--   - The tail-call target is an info table on the stack
329--
330--   - We separate stack arguments into pointers and non-pointers,
331--     to make it easier to leave things in a sane state for a heap check.
332--     This is OK because we can never partially-apply an unboxed tuple,
333--     unlike a function.  The same technique is used when calling
334--     let-no-escape functions, because they also can't be partially
335--     applied.
336
337returnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code
338returnUnboxedTuple amodes
339  = do  { (EndOfBlockInfo args_sp _sequel) <- getEndOfBlockInfo
340        ; tickyUnboxedTupleReturn (length amodes)
341        ; (final_sp, assts, live_regs) <- pushUnboxedTuple args_sp amodes
342        ; emitSimultaneously assts
343        ; doFinalJump final_sp False $ emitReturnInstr (Just live_regs) }
344
345pushUnboxedTuple :: VirtualSpOffset             -- Sp at which to start pushing
346                 -> [(CgRep, CmmExpr)]          -- amodes of the components
347                 -> FCode (VirtualSpOffset,     -- final Sp
348                           CmmStmts,            -- assignments (regs+stack)
349                           [GlobalReg])         -- registers used (liveness)
350
351pushUnboxedTuple sp [] 
352  = return (sp, noStmts, [])
353pushUnboxedTuple sp amodes
354  = do  { let   (reg_arg_amodes, stk_arg_amodes) = assignReturnRegs amodes
355                live_regs = map snd reg_arg_amodes
356       
357                -- separate the rest of the args into pointers and non-pointers
358                (ptr_args, nptr_args) = separateByPtrFollowness stk_arg_amodes
359                reg_arg_assts = assignToRegs reg_arg_amodes
360               
361            -- push ptrs, then nonptrs, on the stack
362        ; (ptr_sp,   ptr_assts)  <- mkStkAmodes sp ptr_args
363        ; (final_sp, nptr_assts) <- mkStkAmodes ptr_sp nptr_args
364
365        ; returnFC (final_sp,
366                    reg_arg_assts `plusStmts` ptr_assts `plusStmts` nptr_assts,
367                    live_regs) }
368   
369                 
370-- -----------------------------------------------------------------------------
371-- Returning unboxed tuples.  This is mainly to support _ccall_GC_, where
372-- we want to do things in a slightly different order to normal:
373--
374--              - push return address
375--              - adjust stack pointer
376--              - r = call(args...)
377--              - assign regs for unboxed tuple (usually just R1 = r)
378--              - return to continuation
379--
380-- The return address (i.e. stack frame) must be on the stack before
381-- doing the call in case the call ends up in the garbage collector.
382--
383-- Sadly, the information about the continuation is lost after we push it
384-- (in order to avoid pushing it again), so we end up doing a needless
385-- indirect jump (ToDo).
386
387ccallReturnUnboxedTuple :: [(CgRep, CmmExpr)] -> Code -> Code
388ccallReturnUnboxedTuple amodes before_jump
389  = do  { eob@(EndOfBlockInfo args_sp _) <- getEndOfBlockInfo
390
391        -- Push a return address if necessary
392        ; pushReturnAddress eob
393        ; setEndOfBlockInfo (EndOfBlockInfo args_sp OnStack)
394            (do { adjustSpAndHp args_sp
395                ; before_jump
396                ; returnUnboxedTuple amodes })
397    }
398
399-- -----------------------------------------------------------------------------
400-- Calling an out-of-line primop
401
402tailCallPrimOp :: PrimOp -> [StgArg] -> Code
403tailCallPrimOp op
404 = tailCallPrim (mkRtsPrimOpLabel op)
405
406tailCallPrimCall :: PrimCall -> [StgArg] -> Code
407tailCallPrimCall primcall
408 = tailCallPrim (mkPrimCallLabel primcall)
409
410tailCallPrim :: CLabel -> [StgArg] -> Code
411tailCallPrim lbl args
412 = do   {       -- We're going to perform a normal-looking tail call,
413                -- except that *all* the arguments will be in registers.
414                -- Hence the ASSERT( null leftovers )
415          arg_amodes <- getArgAmodes args
416        ; let (arg_regs, leftovers) = assignPrimOpCallRegs arg_amodes
417              live_regs = Just $ map snd arg_regs
418              jump_to_primop = jumpToLbl lbl live_regs
419
420        ; ASSERT(null leftovers) -- no stack-resident args
421          emitSimultaneously (assignToRegs arg_regs)
422
423        ; EndOfBlockInfo args_sp _ <- getEndOfBlockInfo
424        ; doFinalJump args_sp False jump_to_primop }
425
426-- -----------------------------------------------------------------------------
427-- Return Addresses
428
429-- We always push the return address just before performing a tail call
430-- or return.  The reason we leave it until then is because the stack
431-- slot that the return address is to go into might contain something
432-- useful.
433--
434-- If the end of block info is 'CaseAlts', then we're in the scrutinee of a
435-- case expression and the return address is still to be pushed.
436--
437-- There are cases where it doesn't look necessary to push the return
438-- address: for example, just before doing a return to a known
439-- continuation.  However, the continuation will expect to find the
440-- return address on the stack in case it needs to do a heap check.
441
442pushReturnAddress :: EndOfBlockInfo -> Code
443
444pushReturnAddress (EndOfBlockInfo args_sp (CaseAlts lbl _ _))
445  = do  { sp_rel <- getSpRelOffset args_sp
446        ; stmtC (CmmStore sp_rel (mkLblExpr lbl)) }
447
448pushReturnAddress _ = nopC
449
450-- -----------------------------------------------------------------------------
451-- Misc.
452
453-- Passes no argument to the destination procedure
454jumpToLbl :: CLabel -> Maybe [GlobalReg] -> Code
455jumpToLbl lbl live = stmtC $ CmmJump (CmmLit $ CmmLabel lbl) live
456
457assignToRegs :: [(CmmExpr, GlobalReg)] -> CmmStmts
458assignToRegs reg_args
459  = mkStmts [ CmmAssign (CmmGlobal reg_id) expr
460            | (expr, reg_id) <- reg_args ] 
461\end{code}
462
463
464%************************************************************************
465%*                                                                      *
466\subsection[CgStackery-adjust]{Adjusting the stack pointers}
467%*                                                                      *
468%************************************************************************
469
470This function adjusts the stack and heap pointers just before a tail
471call or return.  The stack pointer is adjusted to its final position
472(i.e. to point to the last argument for a tail call, or the activation
473record for a return).  The heap pointer may be moved backwards, in
474cases where we overallocated at the beginning of the basic block (see
475CgCase.lhs for discussion).
476
477These functions {\em do not} deal with high-water-mark adjustment.
478That's done by functions which allocate stack space.
479
480\begin{code}
481adjustSpAndHp :: VirtualSpOffset        -- New offset for Arg stack ptr
482              -> Code
483adjustSpAndHp newRealSp
484  = do  { -- Adjust stack, if necessary.
485          -- NB: the conditional on the monad-carried realSp
486          --     is out of line (via codeOnly), to avoid a black hole
487        ; new_sp <- getSpRelOffset newRealSp
488        ; checkedAbsC (CmmAssign spReg new_sp)  -- Will generate no code in the case
489        ; setRealSp newRealSp                   -- where realSp==newRealSp
490
491          -- Adjust heap.  The virtual heap pointer may be less than the real Hp
492          -- because the latter was advanced to deal with the worst-case branch
493          -- of the code, and we may be in a better-case branch.  In that case,
494          -- move the real Hp *back* and retract some ticky allocation count.
495        ; hp_usg <- getHpUsage
496        ; let rHp = realHp hp_usg
497              vHp = virtHp hp_usg
498        ; new_hp <- getHpRelOffset vHp
499        ; checkedAbsC (CmmAssign hpReg new_hp)  -- Generates nothing when vHp==rHp
500        ; tickyAllocHeap (vHp - rHp)            -- ...ditto
501        ; setRealHp vHp
502        }
503\end{code}
Note: See TracBrowser for help on using the browser.