root/rts/StgStdThunks.cmm

Revision 50de6034343abc93a7b01daccff34121042c0e7c, 10.0 KB (checked in by Simon Marlow <marlowsd@…>, 6 months ago)

Make profiling work with multiple capabilities (+RTS -N)

This means that both time and heap profiling work for parallel
programs. Main internal changes:

  • CCCS is no longer a global variable; it is now another pseudo-register in the StgRegTable? struct. Thus every Capability has its own CCCS.
  • There is a new built-in CCS called "IDLE", which records ticks for Capabilities in the idle state. If you profile a single-threaded program with +RTS -N2, you'll see about 50% of time in "IDLE".
  • There is appropriate locking in rts/Profiling.c to protect the shared cost-centre-stack data structures.

This patch does enough to get it working, I have cut one big corner:
the cost-centre-stack data structure is still shared amongst all
Capabilities, which means that multiple Capabilities will race when
updating the "allocations" and "entries" fields of a CCS. Not only
does this give unpredictable results, but it runs very slowly due to
cache line bouncing.

It is strongly recommended that you use -fno-prof-count-entries to
disable the "entries" count when profiling parallel programs. (I shall
add a note to this effect to the docs).

  • Property mode set to 100644
Line 
1/* -----------------------------------------------------------------------------
2 *
3 * (c) The University of Glasgow, 1998-2004
4 *
5 * Canned "Standard Form" Thunks
6 *
7 * This file is written in a subset of C--, extended with various
8 * features specific to GHC.  It is compiled by GHC directly.  For the
9 * syntax of .cmm files, see the parser in ghc/compiler/cmm/CmmParse.y.
10 *
11 * ---------------------------------------------------------------------------*/
12
13#include "Cmm.h"
14
15/* -----------------------------------------------------------------------------
16   The code for a thunk that simply extracts a field from a
17   single-constructor datatype depends only on the offset of the field
18   to be selected.
19
20   Here we define some canned "selector" thunks that do just that; any
21   selector thunk appearing in a program will refer to one of these
22   instead of being compiled independently.
23
24   The garbage collector spots selector thunks and reduces them if
25   possible, in order to avoid space leaks resulting from lazy pattern
26   matching.
27   -------------------------------------------------------------------------- */
28
29#define WITHUPD_FRAME_SIZE  (SIZEOF_StgUpdateFrame + SIZEOF_StgHeader)
30#define NOUPD_FRAME_SIZE    (SIZEOF_StgHeader)
31
32#ifdef PROFILING
33#define SAVE_CCCS(fs)   StgHeader_ccs(Sp-fs) = CCCS
34#define GET_SAVED_CCCS  CCCS = StgHeader_ccs(Sp)
35#define RET_PARAMS      W_ unused1, W_ unused2
36#else
37#define SAVE_CCCS(fs)   /* empty */
38#define GET_SAVED_CCCS  /* empty */
39#define RET_PARAMS
40#endif
41
42/*
43 * TODO: On return, we can use a more efficient
44 *       untagging (we know the constructor tag).
45 *
46 * When entering stg_sel_#_upd, we know R1 points to its closure,
47 * so it's untagged.
48 * The payload might be a thunk or a constructor,
49 * so we enter it.
50 *
51 * When returning, we know for sure it is a constructor,
52 * so we untag it before accessing the field.
53 *
54 */
55#ifdef PROFILING
56// When profiling, we cannot shortcut by checking the tag,
57// because LDV profiling relies on entering closures to mark them as
58// "used".
59#define SEL_ENTER(offset)                       \
60      R1 = UNTAG(R1);                           \
61      jump %GET_ENTRY(R1);
62#else
63#define SEL_ENTER(offset)                               \
64      if (GETTAG(R1) != 0) {                            \
65          jump RET_LBL(stg_sel_ret_##offset##_upd);     \
66      }                                                 \
67      jump %GET_ENTRY(R1);
68#endif
69
70#define SELECTOR_CODE_UPD(offset)                                       \
71  INFO_TABLE_RET(stg_sel_ret_##offset##_upd, RET_SMALL, RET_PARAMS)     \
72  {                                                                     \
73      R1 = StgClosure_payload(UNTAG(R1),offset);                        \
74      GET_SAVED_CCCS;                                                   \
75      Sp = Sp + SIZEOF_StgHeader;                                       \
76      ENTER();                                                          \
77  }                                                                     \
78                                                                        \
79  INFO_TABLE_SELECTOR(stg_sel_##offset##_upd, offset, THUNK_SELECTOR, "stg_sel_upd", "stg_sel_upd") \
80  {                                                                     \
81      TICK_ENT_DYN_THK();                                               \
82      STK_CHK_NP(WITHUPD_FRAME_SIZE);                                   \
83      UPD_BH_UPDATABLE();                                               \
84      LDV_ENTER(R1);                                                    \
85      PUSH_UPD_FRAME(Sp - SIZEOF_StgUpdateFrame, R1);                   \
86      ENTER_CCS_THUNK(R1);                                              \
87      SAVE_CCCS(WITHUPD_FRAME_SIZE);                                    \
88      W_[Sp-WITHUPD_FRAME_SIZE] = stg_sel_ret_##offset##_upd_info;      \
89      Sp = Sp - WITHUPD_FRAME_SIZE;                                     \
90      R1 = StgThunk_payload(R1,0);                                      \
91      SEL_ENTER(offset);                                                \
92  }
93  /* NOTE: no need to ENTER() here, we know the closure cannot evaluate to a function,
94     because we're going to do a field selection on the result. */
95
96SELECTOR_CODE_UPD(0)
97SELECTOR_CODE_UPD(1)
98SELECTOR_CODE_UPD(2)
99SELECTOR_CODE_UPD(3)
100SELECTOR_CODE_UPD(4)
101SELECTOR_CODE_UPD(5)
102SELECTOR_CODE_UPD(6)
103SELECTOR_CODE_UPD(7)
104SELECTOR_CODE_UPD(8)
105SELECTOR_CODE_UPD(9)
106SELECTOR_CODE_UPD(10)
107SELECTOR_CODE_UPD(11)
108SELECTOR_CODE_UPD(12)
109SELECTOR_CODE_UPD(13)
110SELECTOR_CODE_UPD(14)
111SELECTOR_CODE_UPD(15)
112
113#define SELECTOR_CODE_NOUPD(offset) \
114  INFO_TABLE_RET(stg_sel_ret_##offset##_noupd, RET_SMALL, RET_PARAMS)   \
115  {                                                                     \
116      R1 = StgClosure_payload(UNTAG(R1),offset);                        \
117      GET_SAVED_CCCS;                                                   \
118      Sp = Sp + SIZEOF_StgHeader;                                       \
119      ENTER();                                                          \
120  }                                                                     \
121                                                                        \
122  INFO_TABLE_SELECTOR(stg_sel_##offset##_noupd, offset, THUNK_SELECTOR, "stg_sel_noupd", "stg_sel_noupd")\
123  {                                                                     \
124      TICK_ENT_DYN_THK();                                               \
125      STK_CHK_NP(NOUPD_FRAME_SIZE);                                     \
126      UPD_BH_SINGLE_ENTRY();                                            \
127      LDV_ENTER(R1);                                                    \
128      TICK_UPDF_OMITTED();                                              \
129      ENTER_CCS_THUNK(R1);                                              \
130      SAVE_CCCS(NOUPD_FRAME_SIZE);                                      \
131      W_[Sp-NOUPD_FRAME_SIZE] = stg_sel_ret_##offset##_noupd_info;      \
132      Sp = Sp - NOUPD_FRAME_SIZE;                                       \
133      R1 = StgThunk_payload(R1,0);                                      \
134      if (GETTAG(R1) != 0) {                                            \
135          jump RET_LBL(stg_sel_ret_##offset##_noupd);                   \
136      }                                                                 \
137      jump %GET_ENTRY(R1);                                              \
138  }
139
140SELECTOR_CODE_NOUPD(0)
141SELECTOR_CODE_NOUPD(1)
142SELECTOR_CODE_NOUPD(2)
143SELECTOR_CODE_NOUPD(3)
144SELECTOR_CODE_NOUPD(4)
145SELECTOR_CODE_NOUPD(5)
146SELECTOR_CODE_NOUPD(6)
147SELECTOR_CODE_NOUPD(7)
148SELECTOR_CODE_NOUPD(8)
149SELECTOR_CODE_NOUPD(9)
150SELECTOR_CODE_NOUPD(10)
151SELECTOR_CODE_NOUPD(11)
152SELECTOR_CODE_NOUPD(12)
153SELECTOR_CODE_NOUPD(13)
154SELECTOR_CODE_NOUPD(14)
155SELECTOR_CODE_NOUPD(15)
156
157/* -----------------------------------------------------------------------------
158   Apply thunks
159
160   An apply thunk is a thunk of the form
161       
162                let z = [x1...xn] \u x1...xn
163                in ...
164
165   We pre-compile some of these because the code is always the same.
166
167   These have to be independent of the update frame size, so the code
168   works when profiling etc.
169   -------------------------------------------------------------------------- */
170
171/* stg_ap_1_upd_info is a bit redundant, but there appears to be a bug
172 * in the compiler that means stg_ap_1 is generated occasionally (ToDo)
173 */
174
175INFO_TABLE(stg_ap_1_upd,1,0,THUNK_1_0,"stg_ap_1_upd_info","stg_ap_1_upd_info")
176{
177  TICK_ENT_DYN_THK();
178  STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(1));
179  UPD_BH_UPDATABLE();
180  LDV_ENTER(R1);
181  ENTER_CCS_THUNK(R1);
182  PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
183  R1 = StgThunk_payload(R1,0);
184  Sp = Sp - SIZEOF_StgUpdateFrame;
185  jump stg_ap_0_fast;
186}
187
188INFO_TABLE(stg_ap_2_upd,2,0,THUNK_2_0,"stg_ap_2_upd_info","stg_ap_2_upd_info")
189{
190  TICK_ENT_DYN_THK();
191  STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(2));
192  UPD_BH_UPDATABLE();
193  LDV_ENTER(R1);
194  ENTER_CCS_THUNK(R1);
195  PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
196  W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,1);
197  R1 = StgThunk_payload(R1,0);
198  Sp = Sp - SIZEOF_StgUpdateFrame - WDS(1);
199  Sp_adj(-1); // for stg_ap_*_ret
200  TICK_UNKNOWN_CALL();
201  TICK_SLOW_CALL_p();
202  jump RET_LBL(stg_ap_p);
203}
204
205INFO_TABLE(stg_ap_3_upd,3,0,THUNK,"stg_ap_3_upd_info","stg_ap_3_upd_info")
206{
207  TICK_ENT_DYN_THK();
208  STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(3));
209  UPD_BH_UPDATABLE();
210  LDV_ENTER(R1);
211  ENTER_CCS_THUNK(R1);
212  PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
213  W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,2);
214  W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,1);
215  R1 = StgThunk_payload(R1,0);
216  Sp = Sp - SIZEOF_StgUpdateFrame - WDS(2);
217  Sp_adj(-1); // for stg_ap_*_ret
218  TICK_UNKNOWN_CALL();
219  TICK_SLOW_CALL_pp();
220  jump RET_LBL(stg_ap_pp);
221}
222
223INFO_TABLE(stg_ap_4_upd,4,0,THUNK,"stg_ap_4_upd_info","stg_ap_4_upd_info")
224{
225  TICK_ENT_DYN_THK();
226  STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(4));
227  UPD_BH_UPDATABLE();
228  LDV_ENTER(R1);
229  ENTER_CCS_THUNK(R1);
230  PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
231  W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,3);
232  W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,2);
233  W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,1);
234  R1 = StgThunk_payload(R1,0);
235  Sp = Sp - SIZEOF_StgUpdateFrame - WDS(3);
236  Sp_adj(-1); // for stg_ap_*_ret
237  TICK_UNKNOWN_CALL();
238  TICK_SLOW_CALL_ppp();
239  jump RET_LBL(stg_ap_ppp);
240}
241
242INFO_TABLE(stg_ap_5_upd,5,0,THUNK,"stg_ap_5_upd_info","stg_ap_5_upd_info")
243{
244  TICK_ENT_DYN_THK();
245  STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(5));
246  UPD_BH_UPDATABLE();
247  LDV_ENTER(R1);
248  ENTER_CCS_THUNK(R1);
249  PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
250  W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,4);
251  W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,3);
252  W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,2);
253  W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,1);
254  R1 = StgThunk_payload(R1,0);
255  Sp = Sp - SIZEOF_StgUpdateFrame - WDS(4);
256  Sp_adj(-1); // for stg_ap_*_ret
257  TICK_UNKNOWN_CALL();
258  TICK_SLOW_CALL_pppp();
259  jump RET_LBL(stg_ap_pppp);
260}
261
262INFO_TABLE(stg_ap_6_upd,6,0,THUNK,"stg_ap_6_upd_info","stg_ap_6_upd_info")
263{
264  TICK_ENT_DYN_THK();
265  STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(6));
266  UPD_BH_UPDATABLE();
267  LDV_ENTER(R1);
268  ENTER_CCS_THUNK(R1);
269  PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
270  W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,5);
271  W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,4);
272  W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,3);
273  W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,2);
274  W_[Sp-SIZEOF_StgUpdateFrame-WDS(5)] = StgThunk_payload(R1,1);
275  R1 = StgThunk_payload(R1,0);
276  Sp = Sp - SIZEOF_StgUpdateFrame - WDS(5);
277  Sp_adj(-1); // for stg_ap_*_ret
278  TICK_UNKNOWN_CALL();
279  TICK_SLOW_CALL_ppppp();
280  jump RET_LBL(stg_ap_ppppp);
281}
282
283INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
284{
285  TICK_ENT_DYN_THK();
286  STK_CHK_NP(SIZEOF_StgUpdateFrame+WDS(7));
287  UPD_BH_UPDATABLE();
288  LDV_ENTER(R1);
289  ENTER_CCS_THUNK(R1);
290  PUSH_UPD_FRAME(Sp-SIZEOF_StgUpdateFrame,R1);
291  W_[Sp-SIZEOF_StgUpdateFrame-WDS(1)] = StgThunk_payload(R1,6);
292  W_[Sp-SIZEOF_StgUpdateFrame-WDS(2)] = StgThunk_payload(R1,5);
293  W_[Sp-SIZEOF_StgUpdateFrame-WDS(3)] = StgThunk_payload(R1,4);
294  W_[Sp-SIZEOF_StgUpdateFrame-WDS(4)] = StgThunk_payload(R1,3);
295  W_[Sp-SIZEOF_StgUpdateFrame-WDS(5)] = StgThunk_payload(R1,2);
296  W_[Sp-SIZEOF_StgUpdateFrame-WDS(6)] = StgThunk_payload(R1,1);
297  R1 = StgThunk_payload(R1,0);
298  Sp = Sp - SIZEOF_StgUpdateFrame - WDS(6);
299  Sp_adj(-1); // for stg_ap_*_ret
300  TICK_UNKNOWN_CALL();
301  TICK_SLOW_CALL_pppppp();
302  jump RET_LBL(stg_ap_pppppp);
303}
Note: See TracBrowser for help on using the browser.