root/rts/Apply.cmm

Revision 50de6034343abc93a7b01daccff34121042c0e7c, 10.2 KB (checked in by Simon Marlow <marlowsd@…>, 6 months ago)

Make profiling work with multiple capabilities (+RTS -N)

This means that both time and heap profiling work for parallel
programs. Main internal changes:

  • CCCS is no longer a global variable; it is now another pseudo-register in the StgRegTable? struct. Thus every Capability has its own CCCS.
  • There is a new built-in CCS called "IDLE", which records ticks for Capabilities in the idle state. If you profile a single-threaded program with +RTS -N2, you'll see about 50% of time in "IDLE".
  • There is appropriate locking in rts/Profiling.c to protect the shared cost-centre-stack data structures.

This patch does enough to get it working, I have cut one big corner:
the cost-centre-stack data structure is still shared amongst all
Capabilities, which means that multiple Capabilities will race when
updating the "allocations" and "entries" fields of a CCS. Not only
does this give unpredictable results, but it runs very slowly due to
cache line bouncing.

It is strongly recommended that you use -fno-prof-count-entries to
disable the "entries" count when profiling parallel programs. (I shall
add a note to this effect to the docs).

  • Property mode set to 100644
Line 
1/* -----------------------------------------------------------------------------
2 *
3 * (c) The University of Glasgow 2004
4 *
5 * Application-related bits.
6 *
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC.  It is compiled by GHC directly.  For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
10 *
11 * -------------------------------------------------------------------------- */
12
13#include "Cmm.h"
14
15/* ----------------------------------------------------------------------------
16 * Evaluate a closure and return it.
17 *
18 * There isn't an info table / return address version of stg_ap_0, because
19 * everything being returned is guaranteed evaluated, so it would be a no-op.
20 */
21
22STRING(stg_ap_0_ret_str,"stg_ap_0_ret... ")
23
24stg_ap_0_fast
25{
26    // fn is in R1, no args on the stack
27
28    IF_DEBUG(apply,
29        foreign "C" debugBelch(stg_ap_0_ret_str) [R1];
30        foreign "C" printClosure(R1 "ptr") [R1]);
31
32    IF_DEBUG(sanity,
33        foreign "C" checkStackFrame(Sp "ptr") [R1]);
34
35    ENTER();
36}
37
38/* -----------------------------------------------------------------------------
39   Entry Code for a PAP.
40
41   This entry code is *only* called by one of the stg_ap functions.
42   On entry: Sp points to the remaining arguments on the stack.  If
43   the stack check fails, we can just push the PAP on the stack and
44   return to the scheduler.
45
46   On entry: R1 points to the PAP.  The rest of the function's
47   arguments (apart from those that are already in the PAP) are on the
48   stack, starting at Sp(0).  R2 contains an info table which
49   describes these arguments, which is used in the event that the
50   stack check in the entry code below fails.  The info table is
51   currently one of the stg_ap_*_ret family, as this code is always
52   entered from those functions.
53
54   The idea is to copy the chunk of stack from the PAP object onto the
55   stack / into registers, and enter the function.
56   -------------------------------------------------------------------------- */
57
58INFO_TABLE(stg_PAP,/*special layout*/0,0,PAP,"PAP","PAP")
59{  foreign "C" barf("PAP object entered!") never returns; }
60   
61stg_PAP_apply
62{
63  W_ Words;
64  W_ pap;
65   
66  pap = R1;
67
68  Words = TO_W_(StgPAP_n_args(pap));
69
70  //
71  // Check for stack overflow and bump the stack pointer.
72  // We have a hand-rolled stack check fragment here, because none of
73  // the canned ones suit this situation.
74  //
75  if ((Sp - WDS(Words)) < SpLim) {
76      // there is a return address in R2 in the event of a
77      // stack check failure.  The various stg_apply functions arrange
78      // this before calling stg_PAP_entry.
79      Sp_adj(-1);
80      Sp(0) = R2;
81      jump stg_gc_unpt_r1;
82  }
83  Sp_adj(-Words);
84
85  // profiling
86  TICK_ENT_PAP();
87  LDV_ENTER(pap);
88#ifdef PROFILING
89  foreign "C" enterFunCCS(BaseReg "ptr", StgHeader_ccs(pap) "ptr");
90#endif
91
92  // Reload the stack
93  W_ i;
94  W_ p;
95  p = pap + SIZEOF_StgHeader + OFFSET_StgPAP_payload;
96  i = 0;
97for:
98  if (i < Words) {
99    Sp(i) = W_[p];
100    p = p + WDS(1);
101    i = i + 1;
102    goto for;
103  }
104
105  R1 = StgPAP_fun(pap);
106
107/* DEBUGGING CODE, ensures that arity 1 and 2 functions are entered tagged
108  if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 1 ) {
109    if (GETTAG(R1)!=1) {
110        W_[0]=1;
111    }
112  }
113
114  if (TO_W_(StgFunInfoExtra_arity(%FUN_INFO(%INFO_PTR(UNTAG(R1))))) == 2 ) {
115    if (GETTAG(R1)!=2) {
116        W_[0]=1;
117    }
118  }
119*/
120
121  // Off we go!
122  TICK_ENT_VIA_NODE();
123
124#ifdef NO_ARG_REGS
125  jump %GET_ENTRY(UNTAG(R1));
126#else
127      W_ info;
128      info = %GET_FUN_INFO(UNTAG(R1));
129      W_ type;
130      type = TO_W_(StgFunInfoExtra_fun_type(info));
131      if (type == ARG_GEN) {
132          jump StgFunInfoExtra_slow_apply(info);
133      }
134      if (type == ARG_GEN_BIG) {
135          jump StgFunInfoExtra_slow_apply(info);
136      }
137      if (type == ARG_BCO) {
138          Sp_adj(-2);
139          Sp(1) = R1;
140          Sp(0) = stg_apply_interp_info;
141          jump stg_yield_to_interpreter;
142      }
143      jump W_[stg_ap_stack_entries +
144                WDS(TO_W_(StgFunInfoExtra_fun_type(info)))];
145#endif
146}
147
148/* -----------------------------------------------------------------------------
149   Entry Code for an AP (a PAP with arity zero).
150
151   The entry code is very similar to a PAP, except there are no
152   further arguments on the stack to worry about, so the stack check
153   is simpler.  We must also push an update frame on the stack before
154   applying the function.
155   -------------------------------------------------------------------------- */
156
157INFO_TABLE(stg_AP,/*special layout*/0,0,AP,"AP","AP")
158{
159  W_ Words;
160  W_ ap;
161   
162  ap = R1;
163 
164  Words = TO_W_(StgAP_n_args(ap));
165
166  /*
167   * Check for stack overflow.  IMPORTANT: use a _NP check here,
168   * because if the check fails, we might end up blackholing this very
169   * closure, in which case we must enter the blackhole on return rather
170   * than continuing to evaluate the now-defunct closure.
171   */
172  STK_CHK_NP(WDS(Words) + SIZEOF_StgUpdateFrame);
173
174  PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1);
175  Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words);
176
177  TICK_ENT_AP();
178  LDV_ENTER(ap);
179  ENTER_CCS_THUNK(ap);
180
181  // Reload the stack
182  W_ i;
183  W_ p;
184  p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload;
185  i = 0;
186for:
187  if (i < Words) {
188    Sp(i) = W_[p];
189    p = p + WDS(1);
190    i = i + 1;
191    goto for;
192  }
193
194  R1 = StgAP_fun(ap);
195
196  // Off we go!
197  TICK_ENT_VIA_NODE();
198
199#ifdef NO_ARG_REGS
200  jump %GET_ENTRY(UNTAG(R1));
201#else
202      W_ info;
203      info = %GET_FUN_INFO(UNTAG(R1));
204      W_ type;
205      type = TO_W_(StgFunInfoExtra_fun_type(info));
206      if (type == ARG_GEN) {
207          jump StgFunInfoExtra_slow_apply(info);
208      }
209      if (type == ARG_GEN_BIG) {
210          jump StgFunInfoExtra_slow_apply(info);
211      }
212      if (type == ARG_BCO) {
213          Sp_adj(-2);
214          Sp(1) = R1;
215          Sp(0) = stg_apply_interp_info;
216          jump stg_yield_to_interpreter;
217      }
218      jump W_[stg_ap_stack_entries +
219                WDS(TO_W_(StgFunInfoExtra_fun_type(info)))];
220#endif
221}
222
223/* AP_NOUPD is exactly like AP, except that no update frame is pushed.
224   Use for thunks that are guaranteed to be entered once only, such as
225   those generated by the byte-code compiler for inserting breakpoints. */
226
227INFO_TABLE(stg_AP_NOUPD,/*special layout*/0,0,AP,"AP_NOUPD","AP_NOUPD")
228{
229  W_ Words;
230  W_ ap;
231   
232  ap = R1;
233 
234  Words = TO_W_(StgAP_n_args(ap));
235
236  /*
237   * Check for stack overflow.  IMPORTANT: use a _NP check here,
238   * because if the check fails, we might end up blackholing this very
239   * closure, in which case we must enter the blackhole on return rather
240   * than continuing to evaluate the now-defunct closure.
241   */
242  STK_CHK_NP(WDS(Words));
243  Sp = Sp - WDS(Words);
244
245  TICK_ENT_AP();
246  LDV_ENTER(ap);
247  ENTER_CCS_THUNK(ap);
248
249  // Reload the stack
250  W_ i;
251  W_ p;
252  p = ap + SIZEOF_StgHeader + OFFSET_StgAP_payload;
253  i = 0;
254for:
255  if (i < Words) {
256    Sp(i) = W_[p];
257    p = p + WDS(1);
258    i = i + 1;
259    goto for;
260  }
261
262  R1 = StgAP_fun(ap);
263
264  // Off we go!
265  TICK_ENT_VIA_NODE();
266
267#ifdef NO_ARG_REGS
268  jump %GET_ENTRY(UNTAG(R1));
269#else
270      W_ info;
271      info = %GET_FUN_INFO(UNTAG(R1));
272      W_ type;
273      type = TO_W_(StgFunInfoExtra_fun_type(info));
274      if (type == ARG_GEN) {
275          jump StgFunInfoExtra_slow_apply(info);
276      }
277      if (type == ARG_GEN_BIG) {
278          jump StgFunInfoExtra_slow_apply(info);
279      }
280      if (type == ARG_BCO) {
281          Sp_adj(-2);
282          Sp(1) = R1;
283          Sp(0) = stg_apply_interp_info;
284          jump stg_yield_to_interpreter;
285      }
286      jump W_[stg_ap_stack_entries +
287                WDS(TO_W_(StgFunInfoExtra_fun_type(info)))];
288#endif
289}
290
291/* -----------------------------------------------------------------------------
292   Entry Code for an AP_STACK.
293
294   Very similar to a PAP and AP.  The layout is the same as PAP
295   and AP, except that the payload is a chunk of stack instead of
296   being described by the function's info table.  Like an AP,
297   there are no further arguments on the stack to worry about.
298   However, the function closure (ap->fun) does not necessarily point
299   directly to a function, so we have to enter it using stg_ap_0.
300   -------------------------------------------------------------------------- */
301
302INFO_TABLE(stg_AP_STACK,/*special layout*/0,0,AP_STACK,"AP_STACK","AP_STACK")
303{
304  W_ Words;
305  W_ ap;
306
307  ap = R1;
308 
309  Words = StgAP_STACK_size(ap);
310
311  /*
312   * Check for stack overflow.  IMPORTANT: use a _NP check here,
313   * because if the check fails, we might end up blackholing this very
314   * closure, in which case we must enter the blackhole on return rather
315   * than continuing to evaluate the now-defunct closure.
316   */
317  STK_CHK_NP(WDS(Words) + SIZEOF_StgUpdateFrame + WDS(AP_STACK_SPLIM));
318  /* ensure there is at least AP_STACK_SPLIM words of headroom available
319   * after unpacking the AP_STACK. See bug #1466 */
320
321  PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1);
322  Sp = Sp - SIZEOF_StgUpdateFrame - WDS(Words);
323
324  TICK_ENT_AP();
325  LDV_ENTER(ap);
326  ENTER_CCS_THUNK(ap);
327
328  // Reload the stack
329  W_ i;
330  W_ p;
331  p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload;
332  i = 0;
333for:
334  if (i < Words) {
335    Sp(i) = W_[p];
336    p = p + WDS(1);
337    i = i + 1;
338    goto for;
339  }
340
341  // Off we go!
342  TICK_ENT_VIA_NODE();
343
344  R1 = StgAP_STACK_fun(ap);
345
346  ENTER();
347}
348
349/* -----------------------------------------------------------------------------
350   AP_STACK_NOUPD - exactly like AP_STACK, but doesn't push an update frame.
351   -------------------------------------------------------------------------- */
352
353INFO_TABLE(stg_AP_STACK_NOUPD,/*special layout*/0,0,AP_STACK,
354                                        "AP_STACK_NOUPD","AP_STACK_NOUPD")
355{
356  W_ Words;
357  W_ ap;
358
359  ap = R1;
360 
361  Words = StgAP_STACK_size(ap);
362
363  /*
364   * Check for stack overflow.  IMPORTANT: use a _NP check here,
365   * because if the check fails, we might end up blackholing this very
366   * closure, in which case we must enter the blackhole on return rather
367   * than continuing to evaluate the now-defunct closure.
368   */
369  STK_CHK_NP(WDS(Words) + WDS(AP_STACK_SPLIM));
370  /* ensure there is at least AP_STACK_SPLIM words of headroom available
371   * after unpacking the AP_STACK. See bug #1466 */
372
373  Sp = Sp - WDS(Words);
374
375  TICK_ENT_AP();
376  LDV_ENTER(ap);
377  ENTER_CCS_THUNK(ap);
378
379  // Reload the stack
380  W_ i;
381  W_ p;
382  p = ap + SIZEOF_StgHeader + OFFSET_StgAP_STACK_payload;
383  i = 0;
384for:
385  if (i < Words) {
386    Sp(i) = W_[p];
387    p = p + WDS(1);
388    i = i + 1;
389    goto for;
390  }
391
392  // Off we go!
393  TICK_ENT_VIA_NODE();
394
395  R1 = StgAP_STACK_fun(ap);
396
397  ENTER();
398}
Note: See TracBrowser for help on using the browser.