root/rts/sm/Storage.c

Revision 3457c6befa697e52dd71f7efa1940f010f792469, 34.6 KB (checked in by Ian Lynagh <igloo@…>, 4 weeks ago)

Fix maintenance of n_blocks in the RTS

It was causing assertion failures of

ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks)

at

ghc-stage2: internal error: ASSERTION FAILED: file rts/sm/Sanity.c, line 878

  • Property mode set to 100644
Line 
1/* -----------------------------------------------------------------------------
2 *
3 * (c) The GHC Team, 1998-2012
4 *
5 * Storage manager front end
6 *
7 * Documentation on the architecture of the Storage Manager can be
8 * found in the online commentary:
9 *
10 *   http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage
11 *
12 * ---------------------------------------------------------------------------*/
13
14#include "PosixSource.h"
15#include "Rts.h"
16
17#include "Storage.h"
18#include "GCThread.h"
19#include "RtsUtils.h"
20#include "Stats.h"
21#include "BlockAlloc.h"
22#include "Weak.h"
23#include "Sanity.h"
24#include "Arena.h"
25#include "Capability.h"
26#include "Schedule.h"
27#include "RetainerProfile.h"        // for counting memory blocks (memInventory)
28#include "OSMem.h"
29#include "Trace.h"
30#include "GC.h"
31#include "Evac.h"
32
33#include <string.h>
34
35#include "ffi.h"
36
37/*
38 * All these globals require sm_mutex to access in THREADED_RTS mode.
39 */
40StgClosure    *caf_list         = NULL;
41StgClosure    *revertible_caf_list = NULL;
42rtsBool       keepCAFs;
43
44nat large_alloc_lim;    /* GC if n_large_blocks in any nursery
45                         * reaches this. */
46
47bdescr *exec_block;
48
49generation *generations = NULL; /* all the generations */
50generation *g0          = NULL; /* generation 0, for convenience */
51generation *oldest_gen  = NULL; /* oldest generation, for convenience */
52
53nursery *nurseries = NULL;     /* array of nurseries, size == n_capabilities */
54
55#ifdef THREADED_RTS
56/*
57 * Storage manager mutex:  protects all the above state from
58 * simultaneous access by two STG threads.
59 */
60Mutex sm_mutex;
61#endif
62
63static void allocNurseries (nat from, nat to);
64
65static void
66initGeneration (generation *gen, int g)
67{
68    gen->no = g;
69    gen->collections = 0;
70    gen->par_collections = 0;
71    gen->failed_promotions = 0;
72    gen->max_blocks = 0;
73    gen->blocks = NULL;
74    gen->n_blocks = 0;
75    gen->n_words = 0;
76    gen->live_estimate = 0;
77    gen->old_blocks = NULL;
78    gen->n_old_blocks = 0;
79    gen->large_objects = NULL;
80    gen->n_large_blocks = 0;
81    gen->n_new_large_words = 0;
82    gen->scavenged_large_objects = NULL;
83    gen->n_scavenged_large_blocks = 0;
84    gen->mark = 0;
85    gen->compact = 0;
86    gen->bitmap = NULL;
87#ifdef THREADED_RTS
88    initSpinLock(&gen->sync);
89#endif
90    gen->threads = END_TSO_QUEUE;
91    gen->old_threads = END_TSO_QUEUE;
92}
93
94void
95initStorage (void)
96{
97  nat g;
98
99  if (generations != NULL) {
100      // multi-init protection
101      return;
102  }
103
104  initMBlocks();
105
106  /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be
107   * doing something reasonable.
108   */
109  /* We use the NOT_NULL variant or gcc warns that the test is always true */
110  ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info));
111  ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure));
112  ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure));
113 
114  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
115      RtsFlags.GcFlags.heapSizeSuggestion > 
116      RtsFlags.GcFlags.maxHeapSize) {
117      RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion;
118  }
119
120  if (RtsFlags.GcFlags.maxHeapSize != 0 &&
121      RtsFlags.GcFlags.minAllocAreaSize > 
122      RtsFlags.GcFlags.maxHeapSize) {
123      errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)");
124      RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize;
125  }
126
127  initBlockAllocator();
128 
129#if defined(THREADED_RTS)
130  initMutex(&sm_mutex);
131#endif
132
133  ACQUIRE_SM_LOCK;
134
135  /* allocate generation info array */
136  generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations
137                                             * sizeof(struct generation_),
138                                             "initStorage: gens");
139
140  /* Initialise all generations */
141  for(g = 0; g < RtsFlags.GcFlags.generations; g++) {
142      initGeneration(&generations[g], g);
143  }
144
145  /* A couple of convenience pointers */
146  g0 = &generations[0];
147  oldest_gen = &generations[RtsFlags.GcFlags.generations-1];
148
149  /* Set up the destination pointers in each younger gen. step */
150  for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) {
151      generations[g].to = &generations[g+1];
152  }
153  oldest_gen->to = oldest_gen;
154 
155  /* The oldest generation has one step. */
156  if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) {
157      if (RtsFlags.GcFlags.generations == 1) {
158          errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled");
159      } else {
160          oldest_gen->mark = 1;
161          if (RtsFlags.GcFlags.compact)
162              oldest_gen->compact = 1;
163      }
164  }
165
166  generations[0].max_blocks = 0;
167
168  weak_ptr_list = NULL;
169  caf_list = END_OF_STATIC_LIST;
170  revertible_caf_list = END_OF_STATIC_LIST;
171   
172  /* initialise the allocate() interface */
173  large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W;
174
175  exec_block = NULL;
176
177#ifdef THREADED_RTS
178  initSpinLock(&gc_alloc_block_sync);
179  whitehole_spin = 0;
180#endif
181
182  N = 0;
183
184  storageAddCapabilities(0, n_capabilities);
185
186  IF_DEBUG(gc, statDescribeGens());
187
188  RELEASE_SM_LOCK;
189
190  traceEventHeapInfo(CAPSET_HEAP_DEFAULT,
191                     RtsFlags.GcFlags.generations,
192                     RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE_W * sizeof(W_),
193                     RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W * sizeof(W_),
194                     MBLOCK_SIZE_W * sizeof(W_),
195                     BLOCK_SIZE_W  * sizeof(W_));
196}
197
198void storageAddCapabilities (nat from, nat to)
199{
200    nat n, g, i;
201
202    if (from > 0) {
203        nurseries = stgReallocBytes(nurseries, to * sizeof(struct nursery_),
204                                    "storageAddCapabilities");
205    } else {
206        nurseries = stgMallocBytes(to * sizeof(struct nursery_),
207                                   "storageAddCapabilities");
208    }
209
210    // we've moved the nurseries, so we have to update the rNursery
211    // pointers from the Capabilities.
212    for (i = 0; i < to; i++) {
213        capabilities[i].r.rNursery = &nurseries[i];
214    }
215
216    /* The allocation area.  Policy: keep the allocation area
217     * small to begin with, even if we have a large suggested heap
218     * size.  Reason: we're going to do a major collection first, and we
219     * don't want it to be a big one.  This vague idea is borne out by
220     * rigorous experimental evidence.
221     */
222    allocNurseries(from, to);
223
224    // allocate a block for each mut list
225    for (n = from; n < to; n++) {
226        for (g = 1; g < RtsFlags.GcFlags.generations; g++) {
227            capabilities[n].mut_lists[g] = allocBlock();
228        }
229    }
230
231    initGcThreads(from, to);
232}
233
234
235void
236exitStorage (void)
237{
238    lnat allocated = updateNurseriesStats();
239    stat_exit(allocated);
240}
241
242void
243freeStorage (rtsBool free_heap)
244{
245    stgFree(generations);
246    if (free_heap) freeAllMBlocks();
247#if defined(THREADED_RTS)
248    closeMutex(&sm_mutex);
249#endif
250    stgFree(nurseries);
251    freeGcThreads();
252}
253
254/* -----------------------------------------------------------------------------
255   CAF management.
256
257   The entry code for every CAF does the following:
258     
259      - builds a CAF_BLACKHOLE in the heap
260
261      - calls newCaf, which atomically updates the CAF with
262        IND_STATIC pointing to the CAF_BLACKHOLE
263
264      - if newCaf returns zero, it re-enters the CAF (see Note [atomic
265        CAF entry])
266
267      - pushes an update frame pointing to the CAF_BLACKHOLE
268
269   Why do we build an BLACKHOLE in the heap rather than just updating
270   the thunk directly?  It's so that we only need one kind of update
271   frame - otherwise we'd need a static version of the update frame
272   too, and various other parts of the RTS that deal with update
273   frames would also need special cases for static update frames.
274
275   newCaf() does the following:
276       
277      - it updates the CAF with an IND_STATIC pointing to the
278        CAF_BLACKHOLE, atomically.
279
280      - it puts the CAF on the oldest generation's mutable list.
281        This is so that we treat the CAF as a root when collecting
282        younger generations.
283
284   ------------------
285   Note [atomic CAF entry]
286
287   With THREADED_RTS, newCaf() is required to be atomic (see
288   #5558). This is because if two threads happened to enter the same
289   CAF simultaneously, they would create two distinct CAF_BLACKHOLEs,
290   and so the normal threadPaused() machinery for detecting duplicate
291   evaluation will not detect this.  Hence in lockCAF() below, we
292   atomically lock the CAF with WHITEHOLE before updating it with
293   IND_STATIC, and return zero if another thread locked the CAF first.
294   In the event that we lost the race, CAF entry code will re-enter
295   the CAF and block on the other thread's CAF_BLACKHOLE.
296
297   ------------------
298   Note [GHCi CAFs]
299
300   For GHCI, we have additional requirements when dealing with CAFs:
301
302      - we must *retain* all dynamically-loaded CAFs ever entered,
303        just in case we need them again.
304      - we must be able to *revert* CAFs that have been evaluated, to
305        their pre-evaluated form.
306
307      To do this, we use an additional CAF list.  When newCaf() is
308      called on a dynamically-loaded CAF, we add it to the CAF list
309      instead of the old-generation mutable list, and save away its
310      old info pointer (in caf->saved_info) for later reversion.
311
312      To revert all the CAFs, we traverse the CAF list and reset the
313      info pointer to caf->saved_info, then throw away the CAF list.
314      (see GC.c:revertCAFs()).
315
316      -- SDM 29/1/01
317
318   -------------------------------------------------------------------------- */
319
320STATIC_INLINE StgWord lockCAF (StgClosure *caf, StgClosure *bh)
321{
322    const StgInfoTable *orig_info;
323
324    orig_info = caf->header.info;
325
326#ifdef THREADED_RTS
327    const StgInfoTable *cur_info;
328
329    if (orig_info == &stg_IND_STATIC_info ||
330        orig_info == &stg_WHITEHOLE_info) {
331        // already claimed by another thread; re-enter the CAF
332        return 0;
333    }
334
335    cur_info = (const StgInfoTable *)
336        cas((StgVolatilePtr)&caf->header.info,
337            (StgWord)orig_info,
338            (StgWord)&stg_WHITEHOLE_info);
339
340    if (cur_info != orig_info) {
341        // already claimed by another thread; re-enter the CAF
342        return 0;
343    }
344
345    // successfully claimed by us; overwrite with IND_STATIC
346#endif
347
348    // For the benefit of revertCAFs(), save the original info pointer
349    ((StgIndStatic *)caf)->saved_info  = orig_info;
350
351    ((StgIndStatic*)caf)->indirectee = bh;
352    write_barrier();
353    SET_INFO(caf,&stg_IND_STATIC_info);
354
355    return 1;
356}
357
358StgWord
359newCAF(StgRegTable *reg, StgClosure *caf, StgClosure *bh)
360{
361    if (lockCAF(caf,bh) == 0) return 0;
362
363    if(keepCAFs)
364    {
365        // HACK:
366        // If we are in GHCi _and_ we are using dynamic libraries,
367        // then we can't redirect newCAF calls to newDynCAF (see below),
368        // so we make newCAF behave almost like newDynCAF.
369        // The dynamic libraries might be used by both the interpreted
370        // program and GHCi itself, so they must not be reverted.
371        // This also means that in GHCi with dynamic libraries, CAFs are not
372        // garbage collected. If this turns out to be a problem, we could
373        // do another hack here and do an address range test on caf to figure
374        // out whether it is from a dynamic library.
375
376        ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex
377        ((StgIndStatic *)caf)->static_link = caf_list;
378        caf_list = caf;
379        RELEASE_SM_LOCK;
380    }
381    else
382    {
383        // Put this CAF on the mutable list for the old generation.
384        ((StgIndStatic *)caf)->saved_info = NULL;
385        if (oldest_gen->no != 0) {
386            recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no);
387        }
388    }
389    return 1;
390}
391
392// External API for setting the keepCAFs flag. see #3900.
393void
394setKeepCAFs (void)
395{
396    keepCAFs = 1;
397}
398
399// An alternate version of newCaf which is used for dynamically loaded
400// object code in GHCi.  In this case we want to retain *all* CAFs in
401// the object code, because they might be demanded at any time from an
402// expression evaluated on the command line.
403// Also, GHCi might want to revert CAFs, so we add these to the
404// revertible_caf_list.
405//
406// The linker hackily arranges that references to newCaf from dynamic
407// code end up pointing to newDynCAF.
408StgWord
409newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf, StgClosure *bh)
410{
411    if (lockCAF(caf,bh) == 0) return 0;
412
413    ACQUIRE_SM_LOCK;
414
415    ((StgIndStatic *)caf)->static_link = revertible_caf_list;
416    revertible_caf_list = caf;
417
418    RELEASE_SM_LOCK;
419
420    return 1;
421}
422
423/* -----------------------------------------------------------------------------
424   Nursery management.
425   -------------------------------------------------------------------------- */
426
427static bdescr *
428allocNursery (bdescr *tail, nat blocks)
429{
430    bdescr *bd = NULL;
431    nat i, n;
432
433    // We allocate the nursery as a single contiguous block and then
434    // divide it into single blocks manually.  This way we guarantee
435    // that the nursery blocks are adjacent, so that the processor's
436    // automatic prefetching works across nursery blocks.  This is a
437    // tiny optimisation (~0.5%), but it's free.
438
439    while (blocks > 0) {
440        n = stg_min(blocks, BLOCKS_PER_MBLOCK);
441        blocks -= n;
442
443        bd = allocGroup(n);
444        for (i = 0; i < n; i++) {
445            initBdescr(&bd[i], g0, g0);
446
447            bd[i].blocks = 1;
448            bd[i].flags = 0;
449
450            if (i > 0) {
451                bd[i].u.back = &bd[i-1];
452            } else {
453                bd[i].u.back = NULL;
454            }
455
456            if (i+1 < n) {
457                bd[i].link = &bd[i+1];
458            } else {
459                bd[i].link = tail;
460                if (tail != NULL) {
461                    tail->u.back = &bd[i];
462                }
463            }
464
465            bd[i].free = bd[i].start;
466        }
467
468        tail = &bd[0];
469    }
470
471    return &bd[0];
472}
473
474static void
475assignNurseriesToCapabilities (nat from, nat to)
476{
477    nat i;
478
479    for (i = from; i < to; i++) {
480        capabilities[i].r.rCurrentNursery = nurseries[i].blocks;
481        capabilities[i].r.rCurrentAlloc   = NULL;
482    }
483}
484
485static void
486allocNurseries (nat from, nat to)
487{ 
488    nat i;
489
490    for (i = from; i < to; i++) {
491        nurseries[i].blocks =
492            allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize);
493        nurseries[i].n_blocks =
494            RtsFlags.GcFlags.minAllocAreaSize;
495    }
496    assignNurseriesToCapabilities(from, to);
497}
498     
499lnat // words allocated
500clearNurseries (void)
501{
502    lnat allocated = 0;
503    nat i;
504    bdescr *bd;
505
506    for (i = 0; i < n_capabilities; i++) {
507        for (bd = nurseries[i].blocks; bd; bd = bd->link) {
508            allocated                       += (lnat)(bd->free - bd->start);
509            capabilities[i].total_allocated += (lnat)(bd->free - bd->start);
510            bd->free = bd->start;
511            ASSERT(bd->gen_no == 0);
512            ASSERT(bd->gen == g0);
513            IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE));
514        }
515    }
516
517    return allocated;
518}
519
520void
521resetNurseries (void)
522{
523    assignNurseriesToCapabilities(0, n_capabilities);
524}
525
526lnat
527countNurseryBlocks (void)
528{
529    nat i;
530    lnat blocks = 0;
531
532    for (i = 0; i < n_capabilities; i++) {
533        blocks += nurseries[i].n_blocks;
534    }
535    return blocks;
536}
537
538static void
539resizeNursery (nursery *nursery, nat blocks)
540{
541  bdescr *bd;
542  nat nursery_blocks;
543
544  nursery_blocks = nursery->n_blocks;
545  if (nursery_blocks == blocks) return;
546
547  if (nursery_blocks < blocks) {
548      debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", 
549                 blocks);
550    nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
551  } 
552  else {
553    bdescr *next_bd;
554   
555    debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", 
556               blocks);
557
558    bd = nursery->blocks;
559    while (nursery_blocks > blocks) {
560        next_bd = bd->link;
561        next_bd->u.back = NULL;
562        nursery_blocks -= bd->blocks; // might be a large block
563        freeGroup(bd);
564        bd = next_bd;
565    }
566    nursery->blocks = bd;
567    // might have gone just under, by freeing a large block, so make
568    // up the difference.
569    if (nursery_blocks < blocks) {
570        nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks);
571    }
572  }
573 
574  nursery->n_blocks = blocks;
575  ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks);
576}
577
578//
579// Resize each of the nurseries to the specified size.
580//
581void
582resizeNurseriesFixed (nat blocks)
583{
584    nat i;
585    for (i = 0; i < n_capabilities; i++) {
586        resizeNursery(&nurseries[i], blocks);
587    }
588}
589
590//
591// Resize the nurseries to the total specified size.
592//
593void
594resizeNurseries (nat blocks)
595{
596    // If there are multiple nurseries, then we just divide the number
597    // of available blocks between them.
598    resizeNurseriesFixed(blocks / n_capabilities);
599}
600
601
602/* -----------------------------------------------------------------------------
603   move_STACK is called to update the TSO structure after it has been
604   moved from one place to another.
605   -------------------------------------------------------------------------- */
606
607void
608move_STACK (StgStack *src, StgStack *dest)
609{
610    ptrdiff_t diff;
611
612    // relocate the stack pointer...
613    diff = (StgPtr)dest - (StgPtr)src; // In *words*
614    dest->sp = (StgPtr)dest->sp + diff;
615}
616
617/* -----------------------------------------------------------------------------
618   allocate()
619
620   This allocates memory in the current thread - it is intended for
621   use primarily from STG-land where we have a Capability.  It is
622   better than allocate() because it doesn't require taking the
623   sm_mutex lock in the common case.
624
625   Memory is allocated directly from the nursery if possible (but not
626   from the current nursery block, so as not to interfere with
627   Hp/HpLim).
628   -------------------------------------------------------------------------- */
629
630StgPtr
631allocate (Capability *cap, lnat n)
632{
633    bdescr *bd;
634    StgPtr p;
635
636    TICK_ALLOC_HEAP_NOCTR(n);
637    CCS_ALLOC(cap->r.rCCCS,n);
638   
639    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
640        lnat req_blocks =  (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE;
641
642        // Attempting to allocate an object larger than maxHeapSize
643        // should definitely be disallowed.  (bug #1791)
644        if ((RtsFlags.GcFlags.maxHeapSize > 0 &&
645             req_blocks >= RtsFlags.GcFlags.maxHeapSize) ||
646            req_blocks >= HS_INT32_MAX)   // avoid overflow when
647                                          // calling allocGroup() below
648        {
649            heapOverflow();
650            // heapOverflow() doesn't exit (see #2592), but we aren't
651            // in a position to do a clean shutdown here: we
652            // either have to allocate the memory or exit now.
653            // Allocating the memory would be bad, because the user
654            // has requested that we not exceed maxHeapSize, so we
655            // just exit.
656            stg_exit(EXIT_HEAPOVERFLOW);
657        }
658
659        ACQUIRE_SM_LOCK
660        bd = allocGroup(req_blocks);
661        dbl_link_onto(bd, &g0->large_objects);
662        g0->n_large_blocks += bd->blocks; // might be larger than req_blocks
663        g0->n_new_large_words += n;
664        RELEASE_SM_LOCK;
665        initBdescr(bd, g0, g0);
666        bd->flags = BF_LARGE;
667        bd->free = bd->start + n;
668        cap->total_allocated += n;
669        return bd->start;
670    }
671
672    /* small allocation (<LARGE_OBJECT_THRESHOLD) */
673
674    bd = cap->r.rCurrentAlloc;
675    if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
676       
677        // The CurrentAlloc block is full, we need to find another
678        // one.  First, we try taking the next block from the
679        // nursery:
680        bd = cap->r.rCurrentNursery->link;
681       
682        if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) {
683            // The nursery is empty, or the next block is already
684            // full: allocate a fresh block (we can't fail here).
685            ACQUIRE_SM_LOCK;
686            bd = allocBlock();
687            cap->r.rNursery->n_blocks++;
688            RELEASE_SM_LOCK;
689            initBdescr(bd, g0, g0);
690            bd->flags = 0;
691            // If we had to allocate a new block, then we'll GC
692            // pretty quickly now, because MAYBE_GC() will
693            // notice that CurrentNursery->link is NULL.
694        } else {
695            // we have a block in the nursery: take it and put
696            // it at the *front* of the nursery list, and use it
697            // to allocate() from.
698            cap->r.rCurrentNursery->link = bd->link;
699            if (bd->link != NULL) {
700                bd->link->u.back = cap->r.rCurrentNursery;
701            }
702        }
703        dbl_link_onto(bd, &cap->r.rNursery->blocks);
704        cap->r.rCurrentAlloc = bd;
705        IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery));
706    }
707    p = bd->free;
708    bd->free += n;
709
710    IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa));
711    return p;
712}
713
714/* ---------------------------------------------------------------------------
715   Allocate a fixed/pinned object.
716
717   We allocate small pinned objects into a single block, allocating a
718   new block when the current one overflows.  The block is chained
719   onto the large_object_list of generation 0.
720
721   NOTE: The GC can't in general handle pinned objects.  This
722   interface is only safe to use for ByteArrays, which have no
723   pointers and don't require scavenging.  It works because the
724   block's descriptor has the BF_LARGE flag set, so the block is
725   treated as a large object and chained onto various lists, rather
726   than the individual objects being copied.  However, when it comes
727   to scavenge the block, the GC will only scavenge the first object.
728   The reason is that the GC can't linearly scan a block of pinned
729   objects at the moment (doing so would require using the
730   mostly-copying techniques).  But since we're restricting ourselves
731   to pinned ByteArrays, not scavenging is ok.
732
733   This function is called by newPinnedByteArray# which immediately
734   fills the allocated memory with a MutableByteArray#.
735   ------------------------------------------------------------------------- */
736
737StgPtr
738allocatePinned (Capability *cap, lnat n)
739{
740    StgPtr p;
741    bdescr *bd;
742
743    // If the request is for a large object, then allocate()
744    // will give us a pinned object anyway.
745    if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) {
746        p = allocate(cap, n);
747        Bdescr(p)->flags |= BF_PINNED;
748        return p;
749    }
750
751    TICK_ALLOC_HEAP_NOCTR(n);
752    CCS_ALLOC(cap->r.rCCCS,n);
753
754    bd = cap->pinned_object_block;
755   
756    // If we don't have a block of pinned objects yet, or the current
757    // one isn't large enough to hold the new object, get a new one.
758    if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) {
759
760        // stash the old block on cap->pinned_object_blocks.  On the
761        // next GC cycle these objects will be moved to
762        // g0->large_objects.
763        if (bd != NULL) {
764            dbl_link_onto(bd, &cap->pinned_object_blocks);
765        }
766
767        // We need to find another block.  We could just allocate one,
768        // but that means taking a global lock and we really want to
769        // avoid that (benchmarks that allocate a lot of pinned
770        // objects scale really badly if we do this).
771        //
772        // So first, we try taking the next block from the nursery, in
773        // the same way as allocate(), but note that we can only take
774        // an *empty* block, because we're about to mark it as
775        // BF_PINNED | BF_LARGE.
776        bd = cap->r.rCurrentNursery->link;
777        if (bd == NULL || bd->free != bd->start) { // must be empty!
778            // The nursery is empty, or the next block is non-empty:
779            // allocate a fresh block (we can't fail here).
780
781            // XXX in the case when the next nursery block is
782            // non-empty we aren't exerting any pressure to GC soon,
783            // so if this case ever happens then we could in theory
784            // keep allocating for ever without calling the GC. We
785            // can't bump g0->n_new_large_words because that will be
786            // counted towards allocation, and we're already counting
787            // our pinned obects as allocation in
788            // collect_pinned_object_blocks in the GC.
789            ACQUIRE_SM_LOCK;
790            bd = allocBlock();
791            RELEASE_SM_LOCK;
792            initBdescr(bd, g0, g0);
793        } else {
794            // we have a block in the nursery: steal it
795            cap->r.rCurrentNursery->link = bd->link;
796            if (bd->link != NULL) {
797                bd->link->u.back = cap->r.rCurrentNursery;
798            }
799            cap->r.rNursery->n_blocks -= bd->blocks;
800        }
801
802        cap->pinned_object_block = bd;
803        bd->flags  = BF_PINNED | BF_LARGE | BF_EVACUATED;
804
805        // The pinned_object_block remains attached to the capability
806        // until it is full, even if a GC occurs.  We want this
807        // behaviour because otherwise the unallocated portion of the
808        // block would be forever slop, and under certain workloads
809        // (allocating a few ByteStrings per GC) we accumulate a lot
810        // of slop.
811        //
812        // So, the pinned_object_block is initially marked
813        // BF_EVACUATED so the GC won't touch it.  When it is full,
814        // we place it on the large_objects list, and at the start of
815        // the next GC the BF_EVACUATED flag will be cleared, and the
816        // block will be promoted as usual (if anything in it is
817        // live).
818    }
819
820    p = bd->free;
821    bd->free += n;
822    return p;
823}
824
825/* -----------------------------------------------------------------------------
826   Write Barriers
827   -------------------------------------------------------------------------- */
828
829/*
830   This is the write barrier for MUT_VARs, a.k.a. IORefs.  A
831   MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY
832   is.  When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY
833   and is put on the mutable list.
834*/
835void
836dirty_MUT_VAR(StgRegTable *reg, StgClosure *p)
837{
838    Capability *cap = regTableToCapability(reg);
839    if (p->header.info == &stg_MUT_VAR_CLEAN_info) {
840        p->header.info = &stg_MUT_VAR_DIRTY_info;
841        recordClosureMutated(cap,p);
842    }
843}
844
845// Setting a TSO's link field with a write barrier.
846// It is *not* necessary to call this function when
847//    * setting the link field to END_TSO_QUEUE
848//    * putting a TSO on the blackhole_queue
849//    * setting the link field of the currently running TSO, as it
850//      will already be dirty.
851void
852setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target)
853{
854    if (tso->dirty == 0) {
855        tso->dirty = 1;
856        recordClosureMutated(cap,(StgClosure*)tso);
857    }
858    tso->_link = target;
859}
860
861void
862setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target)
863{
864    if (tso->dirty == 0) {
865        tso->dirty = 1;
866        recordClosureMutated(cap,(StgClosure*)tso);
867    }
868    tso->block_info.prev = target;
869}
870
871void
872dirty_TSO (Capability *cap, StgTSO *tso)
873{
874    if (tso->dirty == 0) {
875        tso->dirty = 1;
876        recordClosureMutated(cap,(StgClosure*)tso);
877    }
878}
879
880void
881dirty_STACK (Capability *cap, StgStack *stack)
882{
883    if (stack->dirty == 0) {
884        stack->dirty = 1;
885        recordClosureMutated(cap,(StgClosure*)stack);
886    }
887}
888
889/*
890   This is the write barrier for MVARs.  An MVAR_CLEAN objects is not
891   on the mutable list; a MVAR_DIRTY is.  When written to, a
892   MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list.
893   The check for MVAR_CLEAN is inlined at the call site for speed,
894   this really does make a difference on concurrency-heavy benchmarks
895   such as Chaneneos and cheap-concurrency.
896*/
897void
898dirty_MVAR(StgRegTable *reg, StgClosure *p)
899{
900    recordClosureMutated(regTableToCapability(reg),p);
901}
902
903/* -----------------------------------------------------------------------------
904 * Stats and stuff
905 * -------------------------------------------------------------------------- */
906
907/* -----------------------------------------------------------------------------
908 * updateNurseriesStats()
909 *
910 * Update the per-cap total_allocated numbers with an approximation of
911 * the amount of memory used in each cap's nursery. Also return the
912 * total across all caps.
913 *
914 * Since this update is also performed by clearNurseries() then we only
915 * need this function for the final stats when the RTS is shutting down.
916 * -------------------------------------------------------------------------- */
917
918lnat
919updateNurseriesStats (void)
920{
921    lnat allocated = 0;
922    nat i;
923
924    for (i = 0; i < n_capabilities; i++) {
925        int cap_allocated = countOccupied(nurseries[i].blocks);
926        capabilities[i].total_allocated += cap_allocated;
927        allocated                       += cap_allocated;
928    }
929
930    return allocated;
931}
932
933lnat
934countLargeAllocated (void)
935{
936    return g0->n_new_large_words;
937}
938
939lnat countOccupied (bdescr *bd)
940{
941    lnat words;
942
943    words = 0;
944    for (; bd != NULL; bd = bd->link) {
945        ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W);
946        words += bd->free - bd->start;
947    }
948    return words;
949}
950
951lnat genLiveWords (generation *gen)
952{
953    return gen->n_words + countOccupied(gen->large_objects);
954}
955
956lnat genLiveBlocks (generation *gen)
957{
958    return gen->n_blocks + gen->n_large_blocks;
959}
960
961lnat gcThreadLiveWords (nat i, nat g)
962{
963    lnat words;
964
965    words   = countOccupied(gc_threads[i]->gens[g].todo_bd);
966    words  += countOccupied(gc_threads[i]->gens[g].part_list);
967    words  += countOccupied(gc_threads[i]->gens[g].scavd_list);
968
969    return words;
970}
971
972lnat gcThreadLiveBlocks (nat i, nat g)
973{
974    lnat blocks;
975
976    blocks  = countBlocks(gc_threads[i]->gens[g].todo_bd);
977    blocks += gc_threads[i]->gens[g].n_part_blocks;
978    blocks += gc_threads[i]->gens[g].n_scavd_blocks;
979
980    return blocks;
981}
982
983// Return an accurate count of the live data in the heap, excluding
984// generation 0.
985lnat calcLiveWords (void)
986{
987    nat g;
988    lnat live;
989
990    live = 0;
991    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
992        live += genLiveWords(&generations[g]);
993    }
994    return live;
995}
996
997lnat calcLiveBlocks (void)
998{
999    nat g;
1000    lnat live;
1001
1002    live = 0;
1003    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1004        live += genLiveBlocks(&generations[g]);
1005    }
1006    return live;
1007}
1008
1009/* Approximate the number of blocks that will be needed at the next
1010 * garbage collection.
1011 *
1012 * Assume: all data currently live will remain live.  Generationss
1013 * that will be collected next time will therefore need twice as many
1014 * blocks since all the data will be copied.
1015 */
1016extern lnat
1017calcNeeded(void)
1018{
1019    lnat needed = 0;
1020    nat g;
1021    generation *gen;
1022   
1023    for (g = 0; g < RtsFlags.GcFlags.generations; g++) {
1024        gen = &generations[g];
1025
1026        // we need at least this much space
1027        needed += gen->n_blocks + gen->n_large_blocks;
1028       
1029        // any additional space needed to collect this gen next time?
1030        if (g == 0 || // always collect gen 0
1031            (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) {
1032            // we will collect this gen next time
1033            if (gen->mark) {
1034                //  bitmap:
1035                needed += gen->n_blocks / BITS_IN(W_);
1036                //  mark stack:
1037                needed += gen->n_blocks / 100;
1038            }
1039            if (gen->compact) {
1040                continue; // no additional space needed for compaction
1041            } else {
1042                needed += gen->n_blocks;
1043            }
1044        }
1045    }
1046    return needed;
1047}
1048
1049/* ----------------------------------------------------------------------------
1050   Executable memory
1051
1052   Executable memory must be managed separately from non-executable
1053   memory.  Most OSs these days require you to jump through hoops to
1054   dynamically allocate executable memory, due to various security
1055   measures.
1056
1057   Here we provide a small memory allocator for executable memory.
1058   Memory is managed with a page granularity; we allocate linearly
1059   in the page, and when the page is emptied (all objects on the page
1060   are free) we free the page again, not forgetting to make it
1061   non-executable.
1062
1063   TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that
1064         the linker cannot use allocateExec for loading object code files
1065         on Windows. Once allocateExec can handle larger objects, the linker
1066         should be modified to use allocateExec instead of VirtualAlloc.
1067   ------------------------------------------------------------------------- */
1068
1069#if defined(linux_HOST_OS)
1070
1071// On Linux we need to use libffi for allocating executable memory,
1072// because it knows how to work around the restrictions put in place
1073// by SELinux.
1074
1075void *allocateExec (nat bytes, void **exec_ret)
1076{
1077    void **ret, **exec;
1078    ACQUIRE_SM_LOCK;
1079    ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec);
1080    RELEASE_SM_LOCK;
1081    if (ret == NULL) return ret;
1082    *ret = ret; // save the address of the writable mapping, for freeExec().
1083    *exec_ret = exec + 1;
1084    return (ret + 1);
1085}
1086
1087// freeExec gets passed the executable address, not the writable address.
1088void freeExec (void *addr)
1089{
1090    void *writable;
1091    writable = *((void**)addr - 1);
1092    ACQUIRE_SM_LOCK;
1093    ffi_closure_free (writable);
1094    RELEASE_SM_LOCK
1095}
1096
1097#else
1098
1099void *allocateExec (nat bytes, void **exec_ret)
1100{
1101    void *ret;
1102    nat n;
1103
1104    ACQUIRE_SM_LOCK;
1105
1106    // round up to words.
1107    n  = (bytes + sizeof(W_) + 1) / sizeof(W_);
1108
1109    if (n+1 > BLOCK_SIZE_W) {
1110        barf("allocateExec: can't handle large objects");
1111    }
1112
1113    if (exec_block == NULL || 
1114        exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
1115        bdescr *bd;
1116        lnat pagesize = getPageSize();
1117        bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE));
1118        debugTrace(DEBUG_gc, "allocate exec block %p", bd->start);
1119        bd->gen_no = 0;
1120        bd->flags = BF_EXEC;
1121        bd->link = exec_block;
1122        if (exec_block != NULL) {
1123            exec_block->u.back = bd;
1124        }
1125        bd->u.back = NULL;
1126        setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue);
1127        exec_block = bd;
1128    }
1129    *(exec_block->free) = n;  // store the size of this chunk
1130    exec_block->gen_no += n;  // gen_no stores the number of words allocated
1131    ret = exec_block->free + 1;
1132    exec_block->free += n + 1;
1133
1134    RELEASE_SM_LOCK
1135    *exec_ret = ret;
1136    return ret;
1137}
1138
1139void freeExec (void *addr)
1140{
1141    StgPtr p = (StgPtr)addr - 1;
1142    bdescr *bd = Bdescr((StgPtr)p);
1143
1144    if ((bd->flags & BF_EXEC) == 0) {
1145        barf("freeExec: not executable");
1146    }
1147
1148    if (*(StgPtr)p == 0) {
1149        barf("freeExec: already free?");
1150    }
1151
1152    ACQUIRE_SM_LOCK;
1153
1154    bd->gen_no -= *(StgPtr)p;
1155    *(StgPtr)p = 0;
1156
1157    if (bd->gen_no == 0) {
1158        // Free the block if it is empty, but not if it is the block at
1159        // the head of the queue.
1160        if (bd != exec_block) {
1161            debugTrace(DEBUG_gc, "free exec block %p", bd->start);
1162            dbl_link_remove(bd, &exec_block);
1163            setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse);
1164            freeGroup(bd);
1165        } else {
1166            bd->free = bd->start;
1167        }
1168    }
1169
1170    RELEASE_SM_LOCK
1171}   
1172
1173#endif /* mingw32_HOST_OS */
1174
1175#ifdef DEBUG
1176
1177// handy function for use in gdb, because Bdescr() is inlined.
1178extern bdescr *_bdescr (StgPtr p);
1179
1180bdescr *
1181_bdescr (StgPtr p)
1182{
1183    return Bdescr(p);
1184}
1185
1186#endif
Note: See TracBrowser for help on using the browser.