| 1 | /* ----------------------------------------------------------------------------- |
|---|
| 2 | * |
|---|
| 3 | * (c) The GHC Team, 1998-2012 |
|---|
| 4 | * |
|---|
| 5 | * Storage manager front end |
|---|
| 6 | * |
|---|
| 7 | * Documentation on the architecture of the Storage Manager can be |
|---|
| 8 | * found in the online commentary: |
|---|
| 9 | * |
|---|
| 10 | * http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/Storage |
|---|
| 11 | * |
|---|
| 12 | * ---------------------------------------------------------------------------*/ |
|---|
| 13 | |
|---|
| 14 | #include "PosixSource.h" |
|---|
| 15 | #include "Rts.h" |
|---|
| 16 | |
|---|
| 17 | #include "Storage.h" |
|---|
| 18 | #include "GCThread.h" |
|---|
| 19 | #include "RtsUtils.h" |
|---|
| 20 | #include "Stats.h" |
|---|
| 21 | #include "BlockAlloc.h" |
|---|
| 22 | #include "Weak.h" |
|---|
| 23 | #include "Sanity.h" |
|---|
| 24 | #include "Arena.h" |
|---|
| 25 | #include "Capability.h" |
|---|
| 26 | #include "Schedule.h" |
|---|
| 27 | #include "RetainerProfile.h" // for counting memory blocks (memInventory) |
|---|
| 28 | #include "OSMem.h" |
|---|
| 29 | #include "Trace.h" |
|---|
| 30 | #include "GC.h" |
|---|
| 31 | #include "Evac.h" |
|---|
| 32 | |
|---|
| 33 | #include <string.h> |
|---|
| 34 | |
|---|
| 35 | #include "ffi.h" |
|---|
| 36 | |
|---|
| 37 | /* |
|---|
| 38 | * All these globals require sm_mutex to access in THREADED_RTS mode. |
|---|
| 39 | */ |
|---|
| 40 | StgClosure *caf_list = NULL; |
|---|
| 41 | StgClosure *revertible_caf_list = NULL; |
|---|
| 42 | rtsBool keepCAFs; |
|---|
| 43 | |
|---|
| 44 | nat large_alloc_lim; /* GC if n_large_blocks in any nursery |
|---|
| 45 | * reaches this. */ |
|---|
| 46 | |
|---|
| 47 | bdescr *exec_block; |
|---|
| 48 | |
|---|
| 49 | generation *generations = NULL; /* all the generations */ |
|---|
| 50 | generation *g0 = NULL; /* generation 0, for convenience */ |
|---|
| 51 | generation *oldest_gen = NULL; /* oldest generation, for convenience */ |
|---|
| 52 | |
|---|
| 53 | nursery *nurseries = NULL; /* array of nurseries, size == n_capabilities */ |
|---|
| 54 | |
|---|
| 55 | #ifdef THREADED_RTS |
|---|
| 56 | /* |
|---|
| 57 | * Storage manager mutex: protects all the above state from |
|---|
| 58 | * simultaneous access by two STG threads. |
|---|
| 59 | */ |
|---|
| 60 | Mutex sm_mutex; |
|---|
| 61 | #endif |
|---|
| 62 | |
|---|
| 63 | static void allocNurseries (nat from, nat to); |
|---|
| 64 | |
|---|
| 65 | static void |
|---|
| 66 | initGeneration (generation *gen, int g) |
|---|
| 67 | { |
|---|
| 68 | gen->no = g; |
|---|
| 69 | gen->collections = 0; |
|---|
| 70 | gen->par_collections = 0; |
|---|
| 71 | gen->failed_promotions = 0; |
|---|
| 72 | gen->max_blocks = 0; |
|---|
| 73 | gen->blocks = NULL; |
|---|
| 74 | gen->n_blocks = 0; |
|---|
| 75 | gen->n_words = 0; |
|---|
| 76 | gen->live_estimate = 0; |
|---|
| 77 | gen->old_blocks = NULL; |
|---|
| 78 | gen->n_old_blocks = 0; |
|---|
| 79 | gen->large_objects = NULL; |
|---|
| 80 | gen->n_large_blocks = 0; |
|---|
| 81 | gen->n_new_large_words = 0; |
|---|
| 82 | gen->scavenged_large_objects = NULL; |
|---|
| 83 | gen->n_scavenged_large_blocks = 0; |
|---|
| 84 | gen->mark = 0; |
|---|
| 85 | gen->compact = 0; |
|---|
| 86 | gen->bitmap = NULL; |
|---|
| 87 | #ifdef THREADED_RTS |
|---|
| 88 | initSpinLock(&gen->sync); |
|---|
| 89 | #endif |
|---|
| 90 | gen->threads = END_TSO_QUEUE; |
|---|
| 91 | gen->old_threads = END_TSO_QUEUE; |
|---|
| 92 | } |
|---|
| 93 | |
|---|
| 94 | void |
|---|
| 95 | initStorage (void) |
|---|
| 96 | { |
|---|
| 97 | nat g; |
|---|
| 98 | |
|---|
| 99 | if (generations != NULL) { |
|---|
| 100 | // multi-init protection |
|---|
| 101 | return; |
|---|
| 102 | } |
|---|
| 103 | |
|---|
| 104 | initMBlocks(); |
|---|
| 105 | |
|---|
| 106 | /* Sanity check to make sure the LOOKS_LIKE_ macros appear to be |
|---|
| 107 | * doing something reasonable. |
|---|
| 108 | */ |
|---|
| 109 | /* We use the NOT_NULL variant or gcc warns that the test is always true */ |
|---|
| 110 | ASSERT(LOOKS_LIKE_INFO_PTR_NOT_NULL((StgWord)&stg_BLOCKING_QUEUE_CLEAN_info)); |
|---|
| 111 | ASSERT(LOOKS_LIKE_CLOSURE_PTR(&stg_dummy_ret_closure)); |
|---|
| 112 | ASSERT(!HEAP_ALLOCED(&stg_dummy_ret_closure)); |
|---|
| 113 | |
|---|
| 114 | if (RtsFlags.GcFlags.maxHeapSize != 0 && |
|---|
| 115 | RtsFlags.GcFlags.heapSizeSuggestion > |
|---|
| 116 | RtsFlags.GcFlags.maxHeapSize) { |
|---|
| 117 | RtsFlags.GcFlags.maxHeapSize = RtsFlags.GcFlags.heapSizeSuggestion; |
|---|
| 118 | } |
|---|
| 119 | |
|---|
| 120 | if (RtsFlags.GcFlags.maxHeapSize != 0 && |
|---|
| 121 | RtsFlags.GcFlags.minAllocAreaSize > |
|---|
| 122 | RtsFlags.GcFlags.maxHeapSize) { |
|---|
| 123 | errorBelch("maximum heap size (-M) is smaller than minimum alloc area size (-A)"); |
|---|
| 124 | RtsFlags.GcFlags.minAllocAreaSize = RtsFlags.GcFlags.maxHeapSize; |
|---|
| 125 | } |
|---|
| 126 | |
|---|
| 127 | initBlockAllocator(); |
|---|
| 128 | |
|---|
| 129 | #if defined(THREADED_RTS) |
|---|
| 130 | initMutex(&sm_mutex); |
|---|
| 131 | #endif |
|---|
| 132 | |
|---|
| 133 | ACQUIRE_SM_LOCK; |
|---|
| 134 | |
|---|
| 135 | /* allocate generation info array */ |
|---|
| 136 | generations = (generation *)stgMallocBytes(RtsFlags.GcFlags.generations |
|---|
| 137 | * sizeof(struct generation_), |
|---|
| 138 | "initStorage: gens"); |
|---|
| 139 | |
|---|
| 140 | /* Initialise all generations */ |
|---|
| 141 | for(g = 0; g < RtsFlags.GcFlags.generations; g++) { |
|---|
| 142 | initGeneration(&generations[g], g); |
|---|
| 143 | } |
|---|
| 144 | |
|---|
| 145 | /* A couple of convenience pointers */ |
|---|
| 146 | g0 = &generations[0]; |
|---|
| 147 | oldest_gen = &generations[RtsFlags.GcFlags.generations-1]; |
|---|
| 148 | |
|---|
| 149 | /* Set up the destination pointers in each younger gen. step */ |
|---|
| 150 | for (g = 0; g < RtsFlags.GcFlags.generations-1; g++) { |
|---|
| 151 | generations[g].to = &generations[g+1]; |
|---|
| 152 | } |
|---|
| 153 | oldest_gen->to = oldest_gen; |
|---|
| 154 | |
|---|
| 155 | /* The oldest generation has one step. */ |
|---|
| 156 | if (RtsFlags.GcFlags.compact || RtsFlags.GcFlags.sweep) { |
|---|
| 157 | if (RtsFlags.GcFlags.generations == 1) { |
|---|
| 158 | errorBelch("WARNING: compact/sweep is incompatible with -G1; disabled"); |
|---|
| 159 | } else { |
|---|
| 160 | oldest_gen->mark = 1; |
|---|
| 161 | if (RtsFlags.GcFlags.compact) |
|---|
| 162 | oldest_gen->compact = 1; |
|---|
| 163 | } |
|---|
| 164 | } |
|---|
| 165 | |
|---|
| 166 | generations[0].max_blocks = 0; |
|---|
| 167 | |
|---|
| 168 | weak_ptr_list = NULL; |
|---|
| 169 | caf_list = END_OF_STATIC_LIST; |
|---|
| 170 | revertible_caf_list = END_OF_STATIC_LIST; |
|---|
| 171 | |
|---|
| 172 | /* initialise the allocate() interface */ |
|---|
| 173 | large_alloc_lim = RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W; |
|---|
| 174 | |
|---|
| 175 | exec_block = NULL; |
|---|
| 176 | |
|---|
| 177 | #ifdef THREADED_RTS |
|---|
| 178 | initSpinLock(&gc_alloc_block_sync); |
|---|
| 179 | whitehole_spin = 0; |
|---|
| 180 | #endif |
|---|
| 181 | |
|---|
| 182 | N = 0; |
|---|
| 183 | |
|---|
| 184 | storageAddCapabilities(0, n_capabilities); |
|---|
| 185 | |
|---|
| 186 | IF_DEBUG(gc, statDescribeGens()); |
|---|
| 187 | |
|---|
| 188 | RELEASE_SM_LOCK; |
|---|
| 189 | |
|---|
| 190 | traceEventHeapInfo(CAPSET_HEAP_DEFAULT, |
|---|
| 191 | RtsFlags.GcFlags.generations, |
|---|
| 192 | RtsFlags.GcFlags.maxHeapSize * BLOCK_SIZE_W * sizeof(W_), |
|---|
| 193 | RtsFlags.GcFlags.minAllocAreaSize * BLOCK_SIZE_W * sizeof(W_), |
|---|
| 194 | MBLOCK_SIZE_W * sizeof(W_), |
|---|
| 195 | BLOCK_SIZE_W * sizeof(W_)); |
|---|
| 196 | } |
|---|
| 197 | |
|---|
| 198 | void storageAddCapabilities (nat from, nat to) |
|---|
| 199 | { |
|---|
| 200 | nat n, g, i; |
|---|
| 201 | |
|---|
| 202 | if (from > 0) { |
|---|
| 203 | nurseries = stgReallocBytes(nurseries, to * sizeof(struct nursery_), |
|---|
| 204 | "storageAddCapabilities"); |
|---|
| 205 | } else { |
|---|
| 206 | nurseries = stgMallocBytes(to * sizeof(struct nursery_), |
|---|
| 207 | "storageAddCapabilities"); |
|---|
| 208 | } |
|---|
| 209 | |
|---|
| 210 | // we've moved the nurseries, so we have to update the rNursery |
|---|
| 211 | // pointers from the Capabilities. |
|---|
| 212 | for (i = 0; i < to; i++) { |
|---|
| 213 | capabilities[i].r.rNursery = &nurseries[i]; |
|---|
| 214 | } |
|---|
| 215 | |
|---|
| 216 | /* The allocation area. Policy: keep the allocation area |
|---|
| 217 | * small to begin with, even if we have a large suggested heap |
|---|
| 218 | * size. Reason: we're going to do a major collection first, and we |
|---|
| 219 | * don't want it to be a big one. This vague idea is borne out by |
|---|
| 220 | * rigorous experimental evidence. |
|---|
| 221 | */ |
|---|
| 222 | allocNurseries(from, to); |
|---|
| 223 | |
|---|
| 224 | // allocate a block for each mut list |
|---|
| 225 | for (n = from; n < to; n++) { |
|---|
| 226 | for (g = 1; g < RtsFlags.GcFlags.generations; g++) { |
|---|
| 227 | capabilities[n].mut_lists[g] = allocBlock(); |
|---|
| 228 | } |
|---|
| 229 | } |
|---|
| 230 | |
|---|
| 231 | initGcThreads(from, to); |
|---|
| 232 | } |
|---|
| 233 | |
|---|
| 234 | |
|---|
| 235 | void |
|---|
| 236 | exitStorage (void) |
|---|
| 237 | { |
|---|
| 238 | lnat allocated = updateNurseriesStats(); |
|---|
| 239 | stat_exit(allocated); |
|---|
| 240 | } |
|---|
| 241 | |
|---|
| 242 | void |
|---|
| 243 | freeStorage (rtsBool free_heap) |
|---|
| 244 | { |
|---|
| 245 | stgFree(generations); |
|---|
| 246 | if (free_heap) freeAllMBlocks(); |
|---|
| 247 | #if defined(THREADED_RTS) |
|---|
| 248 | closeMutex(&sm_mutex); |
|---|
| 249 | #endif |
|---|
| 250 | stgFree(nurseries); |
|---|
| 251 | freeGcThreads(); |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | /* ----------------------------------------------------------------------------- |
|---|
| 255 | CAF management. |
|---|
| 256 | |
|---|
| 257 | The entry code for every CAF does the following: |
|---|
| 258 | |
|---|
| 259 | - builds a CAF_BLACKHOLE in the heap |
|---|
| 260 | |
|---|
| 261 | - calls newCaf, which atomically updates the CAF with |
|---|
| 262 | IND_STATIC pointing to the CAF_BLACKHOLE |
|---|
| 263 | |
|---|
| 264 | - if newCaf returns zero, it re-enters the CAF (see Note [atomic |
|---|
| 265 | CAF entry]) |
|---|
| 266 | |
|---|
| 267 | - pushes an update frame pointing to the CAF_BLACKHOLE |
|---|
| 268 | |
|---|
| 269 | Why do we build an BLACKHOLE in the heap rather than just updating |
|---|
| 270 | the thunk directly? It's so that we only need one kind of update |
|---|
| 271 | frame - otherwise we'd need a static version of the update frame |
|---|
| 272 | too, and various other parts of the RTS that deal with update |
|---|
| 273 | frames would also need special cases for static update frames. |
|---|
| 274 | |
|---|
| 275 | newCaf() does the following: |
|---|
| 276 | |
|---|
| 277 | - it updates the CAF with an IND_STATIC pointing to the |
|---|
| 278 | CAF_BLACKHOLE, atomically. |
|---|
| 279 | |
|---|
| 280 | - it puts the CAF on the oldest generation's mutable list. |
|---|
| 281 | This is so that we treat the CAF as a root when collecting |
|---|
| 282 | younger generations. |
|---|
| 283 | |
|---|
| 284 | ------------------ |
|---|
| 285 | Note [atomic CAF entry] |
|---|
| 286 | |
|---|
| 287 | With THREADED_RTS, newCaf() is required to be atomic (see |
|---|
| 288 | #5558). This is because if two threads happened to enter the same |
|---|
| 289 | CAF simultaneously, they would create two distinct CAF_BLACKHOLEs, |
|---|
| 290 | and so the normal threadPaused() machinery for detecting duplicate |
|---|
| 291 | evaluation will not detect this. Hence in lockCAF() below, we |
|---|
| 292 | atomically lock the CAF with WHITEHOLE before updating it with |
|---|
| 293 | IND_STATIC, and return zero if another thread locked the CAF first. |
|---|
| 294 | In the event that we lost the race, CAF entry code will re-enter |
|---|
| 295 | the CAF and block on the other thread's CAF_BLACKHOLE. |
|---|
| 296 | |
|---|
| 297 | ------------------ |
|---|
| 298 | Note [GHCi CAFs] |
|---|
| 299 | |
|---|
| 300 | For GHCI, we have additional requirements when dealing with CAFs: |
|---|
| 301 | |
|---|
| 302 | - we must *retain* all dynamically-loaded CAFs ever entered, |
|---|
| 303 | just in case we need them again. |
|---|
| 304 | - we must be able to *revert* CAFs that have been evaluated, to |
|---|
| 305 | their pre-evaluated form. |
|---|
| 306 | |
|---|
| 307 | To do this, we use an additional CAF list. When newCaf() is |
|---|
| 308 | called on a dynamically-loaded CAF, we add it to the CAF list |
|---|
| 309 | instead of the old-generation mutable list, and save away its |
|---|
| 310 | old info pointer (in caf->saved_info) for later reversion. |
|---|
| 311 | |
|---|
| 312 | To revert all the CAFs, we traverse the CAF list and reset the |
|---|
| 313 | info pointer to caf->saved_info, then throw away the CAF list. |
|---|
| 314 | (see GC.c:revertCAFs()). |
|---|
| 315 | |
|---|
| 316 | -- SDM 29/1/01 |
|---|
| 317 | |
|---|
| 318 | -------------------------------------------------------------------------- */ |
|---|
| 319 | |
|---|
| 320 | STATIC_INLINE StgWord lockCAF (StgClosure *caf, StgClosure *bh) |
|---|
| 321 | { |
|---|
| 322 | const StgInfoTable *orig_info; |
|---|
| 323 | |
|---|
| 324 | orig_info = caf->header.info; |
|---|
| 325 | |
|---|
| 326 | #ifdef THREADED_RTS |
|---|
| 327 | const StgInfoTable *cur_info; |
|---|
| 328 | |
|---|
| 329 | if (orig_info == &stg_IND_STATIC_info || |
|---|
| 330 | orig_info == &stg_WHITEHOLE_info) { |
|---|
| 331 | // already claimed by another thread; re-enter the CAF |
|---|
| 332 | return 0; |
|---|
| 333 | } |
|---|
| 334 | |
|---|
| 335 | cur_info = (const StgInfoTable *) |
|---|
| 336 | cas((StgVolatilePtr)&caf->header.info, |
|---|
| 337 | (StgWord)orig_info, |
|---|
| 338 | (StgWord)&stg_WHITEHOLE_info); |
|---|
| 339 | |
|---|
| 340 | if (cur_info != orig_info) { |
|---|
| 341 | // already claimed by another thread; re-enter the CAF |
|---|
| 342 | return 0; |
|---|
| 343 | } |
|---|
| 344 | |
|---|
| 345 | // successfully claimed by us; overwrite with IND_STATIC |
|---|
| 346 | #endif |
|---|
| 347 | |
|---|
| 348 | // For the benefit of revertCAFs(), save the original info pointer |
|---|
| 349 | ((StgIndStatic *)caf)->saved_info = orig_info; |
|---|
| 350 | |
|---|
| 351 | ((StgIndStatic*)caf)->indirectee = bh; |
|---|
| 352 | write_barrier(); |
|---|
| 353 | SET_INFO(caf,&stg_IND_STATIC_info); |
|---|
| 354 | |
|---|
| 355 | return 1; |
|---|
| 356 | } |
|---|
| 357 | |
|---|
| 358 | StgWord |
|---|
| 359 | newCAF(StgRegTable *reg, StgClosure *caf, StgClosure *bh) |
|---|
| 360 | { |
|---|
| 361 | if (lockCAF(caf,bh) == 0) return 0; |
|---|
| 362 | |
|---|
| 363 | if(keepCAFs) |
|---|
| 364 | { |
|---|
| 365 | // HACK: |
|---|
| 366 | // If we are in GHCi _and_ we are using dynamic libraries, |
|---|
| 367 | // then we can't redirect newCAF calls to newDynCAF (see below), |
|---|
| 368 | // so we make newCAF behave almost like newDynCAF. |
|---|
| 369 | // The dynamic libraries might be used by both the interpreted |
|---|
| 370 | // program and GHCi itself, so they must not be reverted. |
|---|
| 371 | // This also means that in GHCi with dynamic libraries, CAFs are not |
|---|
| 372 | // garbage collected. If this turns out to be a problem, we could |
|---|
| 373 | // do another hack here and do an address range test on caf to figure |
|---|
| 374 | // out whether it is from a dynamic library. |
|---|
| 375 | |
|---|
| 376 | ACQUIRE_SM_LOCK; // caf_list is global, locked by sm_mutex |
|---|
| 377 | ((StgIndStatic *)caf)->static_link = caf_list; |
|---|
| 378 | caf_list = caf; |
|---|
| 379 | RELEASE_SM_LOCK; |
|---|
| 380 | } |
|---|
| 381 | else |
|---|
| 382 | { |
|---|
| 383 | // Put this CAF on the mutable list for the old generation. |
|---|
| 384 | ((StgIndStatic *)caf)->saved_info = NULL; |
|---|
| 385 | if (oldest_gen->no != 0) { |
|---|
| 386 | recordMutableCap(caf, regTableToCapability(reg), oldest_gen->no); |
|---|
| 387 | } |
|---|
| 388 | } |
|---|
| 389 | return 1; |
|---|
| 390 | } |
|---|
| 391 | |
|---|
| 392 | // External API for setting the keepCAFs flag. see #3900. |
|---|
| 393 | void |
|---|
| 394 | setKeepCAFs (void) |
|---|
| 395 | { |
|---|
| 396 | keepCAFs = 1; |
|---|
| 397 | } |
|---|
| 398 | |
|---|
| 399 | // An alternate version of newCaf which is used for dynamically loaded |
|---|
| 400 | // object code in GHCi. In this case we want to retain *all* CAFs in |
|---|
| 401 | // the object code, because they might be demanded at any time from an |
|---|
| 402 | // expression evaluated on the command line. |
|---|
| 403 | // Also, GHCi might want to revert CAFs, so we add these to the |
|---|
| 404 | // revertible_caf_list. |
|---|
| 405 | // |
|---|
| 406 | // The linker hackily arranges that references to newCaf from dynamic |
|---|
| 407 | // code end up pointing to newDynCAF. |
|---|
| 408 | StgWord |
|---|
| 409 | newDynCAF (StgRegTable *reg STG_UNUSED, StgClosure *caf, StgClosure *bh) |
|---|
| 410 | { |
|---|
| 411 | if (lockCAF(caf,bh) == 0) return 0; |
|---|
| 412 | |
|---|
| 413 | ACQUIRE_SM_LOCK; |
|---|
| 414 | |
|---|
| 415 | ((StgIndStatic *)caf)->static_link = revertible_caf_list; |
|---|
| 416 | revertible_caf_list = caf; |
|---|
| 417 | |
|---|
| 418 | RELEASE_SM_LOCK; |
|---|
| 419 | |
|---|
| 420 | return 1; |
|---|
| 421 | } |
|---|
| 422 | |
|---|
| 423 | /* ----------------------------------------------------------------------------- |
|---|
| 424 | Nursery management. |
|---|
| 425 | -------------------------------------------------------------------------- */ |
|---|
| 426 | |
|---|
| 427 | static bdescr * |
|---|
| 428 | allocNursery (bdescr *tail, nat blocks) |
|---|
| 429 | { |
|---|
| 430 | bdescr *bd = NULL; |
|---|
| 431 | nat i, n; |
|---|
| 432 | |
|---|
| 433 | // We allocate the nursery as a single contiguous block and then |
|---|
| 434 | // divide it into single blocks manually. This way we guarantee |
|---|
| 435 | // that the nursery blocks are adjacent, so that the processor's |
|---|
| 436 | // automatic prefetching works across nursery blocks. This is a |
|---|
| 437 | // tiny optimisation (~0.5%), but it's free. |
|---|
| 438 | |
|---|
| 439 | while (blocks > 0) { |
|---|
| 440 | n = stg_min(blocks, BLOCKS_PER_MBLOCK); |
|---|
| 441 | blocks -= n; |
|---|
| 442 | |
|---|
| 443 | bd = allocGroup(n); |
|---|
| 444 | for (i = 0; i < n; i++) { |
|---|
| 445 | initBdescr(&bd[i], g0, g0); |
|---|
| 446 | |
|---|
| 447 | bd[i].blocks = 1; |
|---|
| 448 | bd[i].flags = 0; |
|---|
| 449 | |
|---|
| 450 | if (i > 0) { |
|---|
| 451 | bd[i].u.back = &bd[i-1]; |
|---|
| 452 | } else { |
|---|
| 453 | bd[i].u.back = NULL; |
|---|
| 454 | } |
|---|
| 455 | |
|---|
| 456 | if (i+1 < n) { |
|---|
| 457 | bd[i].link = &bd[i+1]; |
|---|
| 458 | } else { |
|---|
| 459 | bd[i].link = tail; |
|---|
| 460 | if (tail != NULL) { |
|---|
| 461 | tail->u.back = &bd[i]; |
|---|
| 462 | } |
|---|
| 463 | } |
|---|
| 464 | |
|---|
| 465 | bd[i].free = bd[i].start; |
|---|
| 466 | } |
|---|
| 467 | |
|---|
| 468 | tail = &bd[0]; |
|---|
| 469 | } |
|---|
| 470 | |
|---|
| 471 | return &bd[0]; |
|---|
| 472 | } |
|---|
| 473 | |
|---|
| 474 | static void |
|---|
| 475 | assignNurseriesToCapabilities (nat from, nat to) |
|---|
| 476 | { |
|---|
| 477 | nat i; |
|---|
| 478 | |
|---|
| 479 | for (i = from; i < to; i++) { |
|---|
| 480 | capabilities[i].r.rCurrentNursery = nurseries[i].blocks; |
|---|
| 481 | capabilities[i].r.rCurrentAlloc = NULL; |
|---|
| 482 | } |
|---|
| 483 | } |
|---|
| 484 | |
|---|
| 485 | static void |
|---|
| 486 | allocNurseries (nat from, nat to) |
|---|
| 487 | { |
|---|
| 488 | nat i; |
|---|
| 489 | |
|---|
| 490 | for (i = from; i < to; i++) { |
|---|
| 491 | nurseries[i].blocks = |
|---|
| 492 | allocNursery(NULL, RtsFlags.GcFlags.minAllocAreaSize); |
|---|
| 493 | nurseries[i].n_blocks = |
|---|
| 494 | RtsFlags.GcFlags.minAllocAreaSize; |
|---|
| 495 | } |
|---|
| 496 | assignNurseriesToCapabilities(from, to); |
|---|
| 497 | } |
|---|
| 498 | |
|---|
| 499 | lnat // words allocated |
|---|
| 500 | clearNurseries (void) |
|---|
| 501 | { |
|---|
| 502 | lnat allocated = 0; |
|---|
| 503 | nat i; |
|---|
| 504 | bdescr *bd; |
|---|
| 505 | |
|---|
| 506 | for (i = 0; i < n_capabilities; i++) { |
|---|
| 507 | for (bd = nurseries[i].blocks; bd; bd = bd->link) { |
|---|
| 508 | allocated += (lnat)(bd->free - bd->start); |
|---|
| 509 | capabilities[i].total_allocated += (lnat)(bd->free - bd->start); |
|---|
| 510 | bd->free = bd->start; |
|---|
| 511 | ASSERT(bd->gen_no == 0); |
|---|
| 512 | ASSERT(bd->gen == g0); |
|---|
| 513 | IF_DEBUG(sanity,memset(bd->start, 0xaa, BLOCK_SIZE)); |
|---|
| 514 | } |
|---|
| 515 | } |
|---|
| 516 | |
|---|
| 517 | return allocated; |
|---|
| 518 | } |
|---|
| 519 | |
|---|
| 520 | void |
|---|
| 521 | resetNurseries (void) |
|---|
| 522 | { |
|---|
| 523 | assignNurseriesToCapabilities(0, n_capabilities); |
|---|
| 524 | } |
|---|
| 525 | |
|---|
| 526 | lnat |
|---|
| 527 | countNurseryBlocks (void) |
|---|
| 528 | { |
|---|
| 529 | nat i; |
|---|
| 530 | lnat blocks = 0; |
|---|
| 531 | |
|---|
| 532 | for (i = 0; i < n_capabilities; i++) { |
|---|
| 533 | blocks += nurseries[i].n_blocks; |
|---|
| 534 | } |
|---|
| 535 | return blocks; |
|---|
| 536 | } |
|---|
| 537 | |
|---|
| 538 | static void |
|---|
| 539 | resizeNursery (nursery *nursery, nat blocks) |
|---|
| 540 | { |
|---|
| 541 | bdescr *bd; |
|---|
| 542 | nat nursery_blocks; |
|---|
| 543 | |
|---|
| 544 | nursery_blocks = nursery->n_blocks; |
|---|
| 545 | if (nursery_blocks == blocks) return; |
|---|
| 546 | |
|---|
| 547 | if (nursery_blocks < blocks) { |
|---|
| 548 | debugTrace(DEBUG_gc, "increasing size of nursery to %d blocks", |
|---|
| 549 | blocks); |
|---|
| 550 | nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); |
|---|
| 551 | } |
|---|
| 552 | else { |
|---|
| 553 | bdescr *next_bd; |
|---|
| 554 | |
|---|
| 555 | debugTrace(DEBUG_gc, "decreasing size of nursery to %d blocks", |
|---|
| 556 | blocks); |
|---|
| 557 | |
|---|
| 558 | bd = nursery->blocks; |
|---|
| 559 | while (nursery_blocks > blocks) { |
|---|
| 560 | next_bd = bd->link; |
|---|
| 561 | next_bd->u.back = NULL; |
|---|
| 562 | nursery_blocks -= bd->blocks; // might be a large block |
|---|
| 563 | freeGroup(bd); |
|---|
| 564 | bd = next_bd; |
|---|
| 565 | } |
|---|
| 566 | nursery->blocks = bd; |
|---|
| 567 | // might have gone just under, by freeing a large block, so make |
|---|
| 568 | // up the difference. |
|---|
| 569 | if (nursery_blocks < blocks) { |
|---|
| 570 | nursery->blocks = allocNursery(nursery->blocks, blocks-nursery_blocks); |
|---|
| 571 | } |
|---|
| 572 | } |
|---|
| 573 | |
|---|
| 574 | nursery->n_blocks = blocks; |
|---|
| 575 | ASSERT(countBlocks(nursery->blocks) == nursery->n_blocks); |
|---|
| 576 | } |
|---|
| 577 | |
|---|
| 578 | // |
|---|
| 579 | // Resize each of the nurseries to the specified size. |
|---|
| 580 | // |
|---|
| 581 | void |
|---|
| 582 | resizeNurseriesFixed (nat blocks) |
|---|
| 583 | { |
|---|
| 584 | nat i; |
|---|
| 585 | for (i = 0; i < n_capabilities; i++) { |
|---|
| 586 | resizeNursery(&nurseries[i], blocks); |
|---|
| 587 | } |
|---|
| 588 | } |
|---|
| 589 | |
|---|
| 590 | // |
|---|
| 591 | // Resize the nurseries to the total specified size. |
|---|
| 592 | // |
|---|
| 593 | void |
|---|
| 594 | resizeNurseries (nat blocks) |
|---|
| 595 | { |
|---|
| 596 | // If there are multiple nurseries, then we just divide the number |
|---|
| 597 | // of available blocks between them. |
|---|
| 598 | resizeNurseriesFixed(blocks / n_capabilities); |
|---|
| 599 | } |
|---|
| 600 | |
|---|
| 601 | |
|---|
| 602 | /* ----------------------------------------------------------------------------- |
|---|
| 603 | move_STACK is called to update the TSO structure after it has been |
|---|
| 604 | moved from one place to another. |
|---|
| 605 | -------------------------------------------------------------------------- */ |
|---|
| 606 | |
|---|
| 607 | void |
|---|
| 608 | move_STACK (StgStack *src, StgStack *dest) |
|---|
| 609 | { |
|---|
| 610 | ptrdiff_t diff; |
|---|
| 611 | |
|---|
| 612 | // relocate the stack pointer... |
|---|
| 613 | diff = (StgPtr)dest - (StgPtr)src; // In *words* |
|---|
| 614 | dest->sp = (StgPtr)dest->sp + diff; |
|---|
| 615 | } |
|---|
| 616 | |
|---|
| 617 | /* ----------------------------------------------------------------------------- |
|---|
| 618 | allocate() |
|---|
| 619 | |
|---|
| 620 | This allocates memory in the current thread - it is intended for |
|---|
| 621 | use primarily from STG-land where we have a Capability. It is |
|---|
| 622 | better than allocate() because it doesn't require taking the |
|---|
| 623 | sm_mutex lock in the common case. |
|---|
| 624 | |
|---|
| 625 | Memory is allocated directly from the nursery if possible (but not |
|---|
| 626 | from the current nursery block, so as not to interfere with |
|---|
| 627 | Hp/HpLim). |
|---|
| 628 | -------------------------------------------------------------------------- */ |
|---|
| 629 | |
|---|
| 630 | StgPtr |
|---|
| 631 | allocate (Capability *cap, lnat n) |
|---|
| 632 | { |
|---|
| 633 | bdescr *bd; |
|---|
| 634 | StgPtr p; |
|---|
| 635 | |
|---|
| 636 | TICK_ALLOC_HEAP_NOCTR(n); |
|---|
| 637 | CCS_ALLOC(cap->r.rCCCS,n); |
|---|
| 638 | |
|---|
| 639 | if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { |
|---|
| 640 | lnat req_blocks = (lnat)BLOCK_ROUND_UP(n*sizeof(W_)) / BLOCK_SIZE; |
|---|
| 641 | |
|---|
| 642 | // Attempting to allocate an object larger than maxHeapSize |
|---|
| 643 | // should definitely be disallowed. (bug #1791) |
|---|
| 644 | if ((RtsFlags.GcFlags.maxHeapSize > 0 && |
|---|
| 645 | req_blocks >= RtsFlags.GcFlags.maxHeapSize) || |
|---|
| 646 | req_blocks >= HS_INT32_MAX) // avoid overflow when |
|---|
| 647 | // calling allocGroup() below |
|---|
| 648 | { |
|---|
| 649 | heapOverflow(); |
|---|
| 650 | // heapOverflow() doesn't exit (see #2592), but we aren't |
|---|
| 651 | // in a position to do a clean shutdown here: we |
|---|
| 652 | // either have to allocate the memory or exit now. |
|---|
| 653 | // Allocating the memory would be bad, because the user |
|---|
| 654 | // has requested that we not exceed maxHeapSize, so we |
|---|
| 655 | // just exit. |
|---|
| 656 | stg_exit(EXIT_HEAPOVERFLOW); |
|---|
| 657 | } |
|---|
| 658 | |
|---|
| 659 | ACQUIRE_SM_LOCK |
|---|
| 660 | bd = allocGroup(req_blocks); |
|---|
| 661 | dbl_link_onto(bd, &g0->large_objects); |
|---|
| 662 | g0->n_large_blocks += bd->blocks; // might be larger than req_blocks |
|---|
| 663 | g0->n_new_large_words += n; |
|---|
| 664 | RELEASE_SM_LOCK; |
|---|
| 665 | initBdescr(bd, g0, g0); |
|---|
| 666 | bd->flags = BF_LARGE; |
|---|
| 667 | bd->free = bd->start + n; |
|---|
| 668 | cap->total_allocated += n; |
|---|
| 669 | return bd->start; |
|---|
| 670 | } |
|---|
| 671 | |
|---|
| 672 | /* small allocation (<LARGE_OBJECT_THRESHOLD) */ |
|---|
| 673 | |
|---|
| 674 | bd = cap->r.rCurrentAlloc; |
|---|
| 675 | if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { |
|---|
| 676 | |
|---|
| 677 | // The CurrentAlloc block is full, we need to find another |
|---|
| 678 | // one. First, we try taking the next block from the |
|---|
| 679 | // nursery: |
|---|
| 680 | bd = cap->r.rCurrentNursery->link; |
|---|
| 681 | |
|---|
| 682 | if (bd == NULL || bd->free + n > bd->start + BLOCK_SIZE_W) { |
|---|
| 683 | // The nursery is empty, or the next block is already |
|---|
| 684 | // full: allocate a fresh block (we can't fail here). |
|---|
| 685 | ACQUIRE_SM_LOCK; |
|---|
| 686 | bd = allocBlock(); |
|---|
| 687 | cap->r.rNursery->n_blocks++; |
|---|
| 688 | RELEASE_SM_LOCK; |
|---|
| 689 | initBdescr(bd, g0, g0); |
|---|
| 690 | bd->flags = 0; |
|---|
| 691 | // If we had to allocate a new block, then we'll GC |
|---|
| 692 | // pretty quickly now, because MAYBE_GC() will |
|---|
| 693 | // notice that CurrentNursery->link is NULL. |
|---|
| 694 | } else { |
|---|
| 695 | // we have a block in the nursery: take it and put |
|---|
| 696 | // it at the *front* of the nursery list, and use it |
|---|
| 697 | // to allocate() from. |
|---|
| 698 | cap->r.rCurrentNursery->link = bd->link; |
|---|
| 699 | if (bd->link != NULL) { |
|---|
| 700 | bd->link->u.back = cap->r.rCurrentNursery; |
|---|
| 701 | } |
|---|
| 702 | } |
|---|
| 703 | dbl_link_onto(bd, &cap->r.rNursery->blocks); |
|---|
| 704 | cap->r.rCurrentAlloc = bd; |
|---|
| 705 | IF_DEBUG(sanity, checkNurserySanity(cap->r.rNursery)); |
|---|
| 706 | } |
|---|
| 707 | p = bd->free; |
|---|
| 708 | bd->free += n; |
|---|
| 709 | |
|---|
| 710 | IF_DEBUG(sanity, ASSERT(*((StgWord8*)p) == 0xaa)); |
|---|
| 711 | return p; |
|---|
| 712 | } |
|---|
| 713 | |
|---|
| 714 | /* --------------------------------------------------------------------------- |
|---|
| 715 | Allocate a fixed/pinned object. |
|---|
| 716 | |
|---|
| 717 | We allocate small pinned objects into a single block, allocating a |
|---|
| 718 | new block when the current one overflows. The block is chained |
|---|
| 719 | onto the large_object_list of generation 0. |
|---|
| 720 | |
|---|
| 721 | NOTE: The GC can't in general handle pinned objects. This |
|---|
| 722 | interface is only safe to use for ByteArrays, which have no |
|---|
| 723 | pointers and don't require scavenging. It works because the |
|---|
| 724 | block's descriptor has the BF_LARGE flag set, so the block is |
|---|
| 725 | treated as a large object and chained onto various lists, rather |
|---|
| 726 | than the individual objects being copied. However, when it comes |
|---|
| 727 | to scavenge the block, the GC will only scavenge the first object. |
|---|
| 728 | The reason is that the GC can't linearly scan a block of pinned |
|---|
| 729 | objects at the moment (doing so would require using the |
|---|
| 730 | mostly-copying techniques). But since we're restricting ourselves |
|---|
| 731 | to pinned ByteArrays, not scavenging is ok. |
|---|
| 732 | |
|---|
| 733 | This function is called by newPinnedByteArray# which immediately |
|---|
| 734 | fills the allocated memory with a MutableByteArray#. |
|---|
| 735 | ------------------------------------------------------------------------- */ |
|---|
| 736 | |
|---|
| 737 | StgPtr |
|---|
| 738 | allocatePinned (Capability *cap, lnat n) |
|---|
| 739 | { |
|---|
| 740 | StgPtr p; |
|---|
| 741 | bdescr *bd; |
|---|
| 742 | |
|---|
| 743 | // If the request is for a large object, then allocate() |
|---|
| 744 | // will give us a pinned object anyway. |
|---|
| 745 | if (n >= LARGE_OBJECT_THRESHOLD/sizeof(W_)) { |
|---|
| 746 | p = allocate(cap, n); |
|---|
| 747 | Bdescr(p)->flags |= BF_PINNED; |
|---|
| 748 | return p; |
|---|
| 749 | } |
|---|
| 750 | |
|---|
| 751 | TICK_ALLOC_HEAP_NOCTR(n); |
|---|
| 752 | CCS_ALLOC(cap->r.rCCCS,n); |
|---|
| 753 | |
|---|
| 754 | bd = cap->pinned_object_block; |
|---|
| 755 | |
|---|
| 756 | // If we don't have a block of pinned objects yet, or the current |
|---|
| 757 | // one isn't large enough to hold the new object, get a new one. |
|---|
| 758 | if (bd == NULL || (bd->free + n) > (bd->start + BLOCK_SIZE_W)) { |
|---|
| 759 | |
|---|
| 760 | // stash the old block on cap->pinned_object_blocks. On the |
|---|
| 761 | // next GC cycle these objects will be moved to |
|---|
| 762 | // g0->large_objects. |
|---|
| 763 | if (bd != NULL) { |
|---|
| 764 | dbl_link_onto(bd, &cap->pinned_object_blocks); |
|---|
| 765 | } |
|---|
| 766 | |
|---|
| 767 | // We need to find another block. We could just allocate one, |
|---|
| 768 | // but that means taking a global lock and we really want to |
|---|
| 769 | // avoid that (benchmarks that allocate a lot of pinned |
|---|
| 770 | // objects scale really badly if we do this). |
|---|
| 771 | // |
|---|
| 772 | // So first, we try taking the next block from the nursery, in |
|---|
| 773 | // the same way as allocate(), but note that we can only take |
|---|
| 774 | // an *empty* block, because we're about to mark it as |
|---|
| 775 | // BF_PINNED | BF_LARGE. |
|---|
| 776 | bd = cap->r.rCurrentNursery->link; |
|---|
| 777 | if (bd == NULL || bd->free != bd->start) { // must be empty! |
|---|
| 778 | // The nursery is empty, or the next block is non-empty: |
|---|
| 779 | // allocate a fresh block (we can't fail here). |
|---|
| 780 | |
|---|
| 781 | // XXX in the case when the next nursery block is |
|---|
| 782 | // non-empty we aren't exerting any pressure to GC soon, |
|---|
| 783 | // so if this case ever happens then we could in theory |
|---|
| 784 | // keep allocating for ever without calling the GC. We |
|---|
| 785 | // can't bump g0->n_new_large_words because that will be |
|---|
| 786 | // counted towards allocation, and we're already counting |
|---|
| 787 | // our pinned obects as allocation in |
|---|
| 788 | // collect_pinned_object_blocks in the GC. |
|---|
| 789 | ACQUIRE_SM_LOCK; |
|---|
| 790 | bd = allocBlock(); |
|---|
| 791 | RELEASE_SM_LOCK; |
|---|
| 792 | initBdescr(bd, g0, g0); |
|---|
| 793 | } else { |
|---|
| 794 | // we have a block in the nursery: steal it |
|---|
| 795 | cap->r.rCurrentNursery->link = bd->link; |
|---|
| 796 | if (bd->link != NULL) { |
|---|
| 797 | bd->link->u.back = cap->r.rCurrentNursery; |
|---|
| 798 | } |
|---|
| 799 | cap->r.rNursery->n_blocks -= bd->blocks; |
|---|
| 800 | } |
|---|
| 801 | |
|---|
| 802 | cap->pinned_object_block = bd; |
|---|
| 803 | bd->flags = BF_PINNED | BF_LARGE | BF_EVACUATED; |
|---|
| 804 | |
|---|
| 805 | // The pinned_object_block remains attached to the capability |
|---|
| 806 | // until it is full, even if a GC occurs. We want this |
|---|
| 807 | // behaviour because otherwise the unallocated portion of the |
|---|
| 808 | // block would be forever slop, and under certain workloads |
|---|
| 809 | // (allocating a few ByteStrings per GC) we accumulate a lot |
|---|
| 810 | // of slop. |
|---|
| 811 | // |
|---|
| 812 | // So, the pinned_object_block is initially marked |
|---|
| 813 | // BF_EVACUATED so the GC won't touch it. When it is full, |
|---|
| 814 | // we place it on the large_objects list, and at the start of |
|---|
| 815 | // the next GC the BF_EVACUATED flag will be cleared, and the |
|---|
| 816 | // block will be promoted as usual (if anything in it is |
|---|
| 817 | // live). |
|---|
| 818 | } |
|---|
| 819 | |
|---|
| 820 | p = bd->free; |
|---|
| 821 | bd->free += n; |
|---|
| 822 | return p; |
|---|
| 823 | } |
|---|
| 824 | |
|---|
| 825 | /* ----------------------------------------------------------------------------- |
|---|
| 826 | Write Barriers |
|---|
| 827 | -------------------------------------------------------------------------- */ |
|---|
| 828 | |
|---|
| 829 | /* |
|---|
| 830 | This is the write barrier for MUT_VARs, a.k.a. IORefs. A |
|---|
| 831 | MUT_VAR_CLEAN object is not on the mutable list; a MUT_VAR_DIRTY |
|---|
| 832 | is. When written to, a MUT_VAR_CLEAN turns into a MUT_VAR_DIRTY |
|---|
| 833 | and is put on the mutable list. |
|---|
| 834 | */ |
|---|
| 835 | void |
|---|
| 836 | dirty_MUT_VAR(StgRegTable *reg, StgClosure *p) |
|---|
| 837 | { |
|---|
| 838 | Capability *cap = regTableToCapability(reg); |
|---|
| 839 | if (p->header.info == &stg_MUT_VAR_CLEAN_info) { |
|---|
| 840 | p->header.info = &stg_MUT_VAR_DIRTY_info; |
|---|
| 841 | recordClosureMutated(cap,p); |
|---|
| 842 | } |
|---|
| 843 | } |
|---|
| 844 | |
|---|
| 845 | // Setting a TSO's link field with a write barrier. |
|---|
| 846 | // It is *not* necessary to call this function when |
|---|
| 847 | // * setting the link field to END_TSO_QUEUE |
|---|
| 848 | // * putting a TSO on the blackhole_queue |
|---|
| 849 | // * setting the link field of the currently running TSO, as it |
|---|
| 850 | // will already be dirty. |
|---|
| 851 | void |
|---|
| 852 | setTSOLink (Capability *cap, StgTSO *tso, StgTSO *target) |
|---|
| 853 | { |
|---|
| 854 | if (tso->dirty == 0) { |
|---|
| 855 | tso->dirty = 1; |
|---|
| 856 | recordClosureMutated(cap,(StgClosure*)tso); |
|---|
| 857 | } |
|---|
| 858 | tso->_link = target; |
|---|
| 859 | } |
|---|
| 860 | |
|---|
| 861 | void |
|---|
| 862 | setTSOPrev (Capability *cap, StgTSO *tso, StgTSO *target) |
|---|
| 863 | { |
|---|
| 864 | if (tso->dirty == 0) { |
|---|
| 865 | tso->dirty = 1; |
|---|
| 866 | recordClosureMutated(cap,(StgClosure*)tso); |
|---|
| 867 | } |
|---|
| 868 | tso->block_info.prev = target; |
|---|
| 869 | } |
|---|
| 870 | |
|---|
| 871 | void |
|---|
| 872 | dirty_TSO (Capability *cap, StgTSO *tso) |
|---|
| 873 | { |
|---|
| 874 | if (tso->dirty == 0) { |
|---|
| 875 | tso->dirty = 1; |
|---|
| 876 | recordClosureMutated(cap,(StgClosure*)tso); |
|---|
| 877 | } |
|---|
| 878 | } |
|---|
| 879 | |
|---|
| 880 | void |
|---|
| 881 | dirty_STACK (Capability *cap, StgStack *stack) |
|---|
| 882 | { |
|---|
| 883 | if (stack->dirty == 0) { |
|---|
| 884 | stack->dirty = 1; |
|---|
| 885 | recordClosureMutated(cap,(StgClosure*)stack); |
|---|
| 886 | } |
|---|
| 887 | } |
|---|
| 888 | |
|---|
| 889 | /* |
|---|
| 890 | This is the write barrier for MVARs. An MVAR_CLEAN objects is not |
|---|
| 891 | on the mutable list; a MVAR_DIRTY is. When written to, a |
|---|
| 892 | MVAR_CLEAN turns into a MVAR_DIRTY and is put on the mutable list. |
|---|
| 893 | The check for MVAR_CLEAN is inlined at the call site for speed, |
|---|
| 894 | this really does make a difference on concurrency-heavy benchmarks |
|---|
| 895 | such as Chaneneos and cheap-concurrency. |
|---|
| 896 | */ |
|---|
| 897 | void |
|---|
| 898 | dirty_MVAR(StgRegTable *reg, StgClosure *p) |
|---|
| 899 | { |
|---|
| 900 | recordClosureMutated(regTableToCapability(reg),p); |
|---|
| 901 | } |
|---|
| 902 | |
|---|
| 903 | /* ----------------------------------------------------------------------------- |
|---|
| 904 | * Stats and stuff |
|---|
| 905 | * -------------------------------------------------------------------------- */ |
|---|
| 906 | |
|---|
| 907 | /* ----------------------------------------------------------------------------- |
|---|
| 908 | * updateNurseriesStats() |
|---|
| 909 | * |
|---|
| 910 | * Update the per-cap total_allocated numbers with an approximation of |
|---|
| 911 | * the amount of memory used in each cap's nursery. Also return the |
|---|
| 912 | * total across all caps. |
|---|
| 913 | * |
|---|
| 914 | * Since this update is also performed by clearNurseries() then we only |
|---|
| 915 | * need this function for the final stats when the RTS is shutting down. |
|---|
| 916 | * -------------------------------------------------------------------------- */ |
|---|
| 917 | |
|---|
| 918 | lnat |
|---|
| 919 | updateNurseriesStats (void) |
|---|
| 920 | { |
|---|
| 921 | lnat allocated = 0; |
|---|
| 922 | nat i; |
|---|
| 923 | |
|---|
| 924 | for (i = 0; i < n_capabilities; i++) { |
|---|
| 925 | int cap_allocated = countOccupied(nurseries[i].blocks); |
|---|
| 926 | capabilities[i].total_allocated += cap_allocated; |
|---|
| 927 | allocated += cap_allocated; |
|---|
| 928 | } |
|---|
| 929 | |
|---|
| 930 | return allocated; |
|---|
| 931 | } |
|---|
| 932 | |
|---|
| 933 | lnat |
|---|
| 934 | countLargeAllocated (void) |
|---|
| 935 | { |
|---|
| 936 | return g0->n_new_large_words; |
|---|
| 937 | } |
|---|
| 938 | |
|---|
| 939 | lnat countOccupied (bdescr *bd) |
|---|
| 940 | { |
|---|
| 941 | lnat words; |
|---|
| 942 | |
|---|
| 943 | words = 0; |
|---|
| 944 | for (; bd != NULL; bd = bd->link) { |
|---|
| 945 | ASSERT(bd->free <= bd->start + bd->blocks * BLOCK_SIZE_W); |
|---|
| 946 | words += bd->free - bd->start; |
|---|
| 947 | } |
|---|
| 948 | return words; |
|---|
| 949 | } |
|---|
| 950 | |
|---|
| 951 | lnat genLiveWords (generation *gen) |
|---|
| 952 | { |
|---|
| 953 | return gen->n_words + countOccupied(gen->large_objects); |
|---|
| 954 | } |
|---|
| 955 | |
|---|
| 956 | lnat genLiveBlocks (generation *gen) |
|---|
| 957 | { |
|---|
| 958 | return gen->n_blocks + gen->n_large_blocks; |
|---|
| 959 | } |
|---|
| 960 | |
|---|
| 961 | lnat gcThreadLiveWords (nat i, nat g) |
|---|
| 962 | { |
|---|
| 963 | lnat words; |
|---|
| 964 | |
|---|
| 965 | words = countOccupied(gc_threads[i]->gens[g].todo_bd); |
|---|
| 966 | words += countOccupied(gc_threads[i]->gens[g].part_list); |
|---|
| 967 | words += countOccupied(gc_threads[i]->gens[g].scavd_list); |
|---|
| 968 | |
|---|
| 969 | return words; |
|---|
| 970 | } |
|---|
| 971 | |
|---|
| 972 | lnat gcThreadLiveBlocks (nat i, nat g) |
|---|
| 973 | { |
|---|
| 974 | lnat blocks; |
|---|
| 975 | |
|---|
| 976 | blocks = countBlocks(gc_threads[i]->gens[g].todo_bd); |
|---|
| 977 | blocks += gc_threads[i]->gens[g].n_part_blocks; |
|---|
| 978 | blocks += gc_threads[i]->gens[g].n_scavd_blocks; |
|---|
| 979 | |
|---|
| 980 | return blocks; |
|---|
| 981 | } |
|---|
| 982 | |
|---|
| 983 | // Return an accurate count of the live data in the heap, excluding |
|---|
| 984 | // generation 0. |
|---|
| 985 | lnat calcLiveWords (void) |
|---|
| 986 | { |
|---|
| 987 | nat g; |
|---|
| 988 | lnat live; |
|---|
| 989 | |
|---|
| 990 | live = 0; |
|---|
| 991 | for (g = 0; g < RtsFlags.GcFlags.generations; g++) { |
|---|
| 992 | live += genLiveWords(&generations[g]); |
|---|
| 993 | } |
|---|
| 994 | return live; |
|---|
| 995 | } |
|---|
| 996 | |
|---|
| 997 | lnat calcLiveBlocks (void) |
|---|
| 998 | { |
|---|
| 999 | nat g; |
|---|
| 1000 | lnat live; |
|---|
| 1001 | |
|---|
| 1002 | live = 0; |
|---|
| 1003 | for (g = 0; g < RtsFlags.GcFlags.generations; g++) { |
|---|
| 1004 | live += genLiveBlocks(&generations[g]); |
|---|
| 1005 | } |
|---|
| 1006 | return live; |
|---|
| 1007 | } |
|---|
| 1008 | |
|---|
| 1009 | /* Approximate the number of blocks that will be needed at the next |
|---|
| 1010 | * garbage collection. |
|---|
| 1011 | * |
|---|
| 1012 | * Assume: all data currently live will remain live. Generationss |
|---|
| 1013 | * that will be collected next time will therefore need twice as many |
|---|
| 1014 | * blocks since all the data will be copied. |
|---|
| 1015 | */ |
|---|
| 1016 | extern lnat |
|---|
| 1017 | calcNeeded(void) |
|---|
| 1018 | { |
|---|
| 1019 | lnat needed = 0; |
|---|
| 1020 | nat g; |
|---|
| 1021 | generation *gen; |
|---|
| 1022 | |
|---|
| 1023 | for (g = 0; g < RtsFlags.GcFlags.generations; g++) { |
|---|
| 1024 | gen = &generations[g]; |
|---|
| 1025 | |
|---|
| 1026 | // we need at least this much space |
|---|
| 1027 | needed += gen->n_blocks + gen->n_large_blocks; |
|---|
| 1028 | |
|---|
| 1029 | // any additional space needed to collect this gen next time? |
|---|
| 1030 | if (g == 0 || // always collect gen 0 |
|---|
| 1031 | (gen->n_blocks + gen->n_large_blocks > gen->max_blocks)) { |
|---|
| 1032 | // we will collect this gen next time |
|---|
| 1033 | if (gen->mark) { |
|---|
| 1034 | // bitmap: |
|---|
| 1035 | needed += gen->n_blocks / BITS_IN(W_); |
|---|
| 1036 | // mark stack: |
|---|
| 1037 | needed += gen->n_blocks / 100; |
|---|
| 1038 | } |
|---|
| 1039 | if (gen->compact) { |
|---|
| 1040 | continue; // no additional space needed for compaction |
|---|
| 1041 | } else { |
|---|
| 1042 | needed += gen->n_blocks; |
|---|
| 1043 | } |
|---|
| 1044 | } |
|---|
| 1045 | } |
|---|
| 1046 | return needed; |
|---|
| 1047 | } |
|---|
| 1048 | |
|---|
| 1049 | /* ---------------------------------------------------------------------------- |
|---|
| 1050 | Executable memory |
|---|
| 1051 | |
|---|
| 1052 | Executable memory must be managed separately from non-executable |
|---|
| 1053 | memory. Most OSs these days require you to jump through hoops to |
|---|
| 1054 | dynamically allocate executable memory, due to various security |
|---|
| 1055 | measures. |
|---|
| 1056 | |
|---|
| 1057 | Here we provide a small memory allocator for executable memory. |
|---|
| 1058 | Memory is managed with a page granularity; we allocate linearly |
|---|
| 1059 | in the page, and when the page is emptied (all objects on the page |
|---|
| 1060 | are free) we free the page again, not forgetting to make it |
|---|
| 1061 | non-executable. |
|---|
| 1062 | |
|---|
| 1063 | TODO: The inability to handle objects bigger than BLOCK_SIZE_W means that |
|---|
| 1064 | the linker cannot use allocateExec for loading object code files |
|---|
| 1065 | on Windows. Once allocateExec can handle larger objects, the linker |
|---|
| 1066 | should be modified to use allocateExec instead of VirtualAlloc. |
|---|
| 1067 | ------------------------------------------------------------------------- */ |
|---|
| 1068 | |
|---|
| 1069 | #if defined(linux_HOST_OS) |
|---|
| 1070 | |
|---|
| 1071 | // On Linux we need to use libffi for allocating executable memory, |
|---|
| 1072 | // because it knows how to work around the restrictions put in place |
|---|
| 1073 | // by SELinux. |
|---|
| 1074 | |
|---|
| 1075 | void *allocateExec (nat bytes, void **exec_ret) |
|---|
| 1076 | { |
|---|
| 1077 | void **ret, **exec; |
|---|
| 1078 | ACQUIRE_SM_LOCK; |
|---|
| 1079 | ret = ffi_closure_alloc (sizeof(void *) + (size_t)bytes, (void**)&exec); |
|---|
| 1080 | RELEASE_SM_LOCK; |
|---|
| 1081 | if (ret == NULL) return ret; |
|---|
| 1082 | *ret = ret; // save the address of the writable mapping, for freeExec(). |
|---|
| 1083 | *exec_ret = exec + 1; |
|---|
| 1084 | return (ret + 1); |
|---|
| 1085 | } |
|---|
| 1086 | |
|---|
| 1087 | // freeExec gets passed the executable address, not the writable address. |
|---|
| 1088 | void freeExec (void *addr) |
|---|
| 1089 | { |
|---|
| 1090 | void *writable; |
|---|
| 1091 | writable = *((void**)addr - 1); |
|---|
| 1092 | ACQUIRE_SM_LOCK; |
|---|
| 1093 | ffi_closure_free (writable); |
|---|
| 1094 | RELEASE_SM_LOCK |
|---|
| 1095 | } |
|---|
| 1096 | |
|---|
| 1097 | #else |
|---|
| 1098 | |
|---|
| 1099 | void *allocateExec (nat bytes, void **exec_ret) |
|---|
| 1100 | { |
|---|
| 1101 | void *ret; |
|---|
| 1102 | nat n; |
|---|
| 1103 | |
|---|
| 1104 | ACQUIRE_SM_LOCK; |
|---|
| 1105 | |
|---|
| 1106 | // round up to words. |
|---|
| 1107 | n = (bytes + sizeof(W_) + 1) / sizeof(W_); |
|---|
| 1108 | |
|---|
| 1109 | if (n+1 > BLOCK_SIZE_W) { |
|---|
| 1110 | barf("allocateExec: can't handle large objects"); |
|---|
| 1111 | } |
|---|
| 1112 | |
|---|
| 1113 | if (exec_block == NULL || |
|---|
| 1114 | exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) { |
|---|
| 1115 | bdescr *bd; |
|---|
| 1116 | lnat pagesize = getPageSize(); |
|---|
| 1117 | bd = allocGroup(stg_max(1, pagesize / BLOCK_SIZE)); |
|---|
| 1118 | debugTrace(DEBUG_gc, "allocate exec block %p", bd->start); |
|---|
| 1119 | bd->gen_no = 0; |
|---|
| 1120 | bd->flags = BF_EXEC; |
|---|
| 1121 | bd->link = exec_block; |
|---|
| 1122 | if (exec_block != NULL) { |
|---|
| 1123 | exec_block->u.back = bd; |
|---|
| 1124 | } |
|---|
| 1125 | bd->u.back = NULL; |
|---|
| 1126 | setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsTrue); |
|---|
| 1127 | exec_block = bd; |
|---|
| 1128 | } |
|---|
| 1129 | *(exec_block->free) = n; // store the size of this chunk |
|---|
| 1130 | exec_block->gen_no += n; // gen_no stores the number of words allocated |
|---|
| 1131 | ret = exec_block->free + 1; |
|---|
| 1132 | exec_block->free += n + 1; |
|---|
| 1133 | |
|---|
| 1134 | RELEASE_SM_LOCK |
|---|
| 1135 | *exec_ret = ret; |
|---|
| 1136 | return ret; |
|---|
| 1137 | } |
|---|
| 1138 | |
|---|
| 1139 | void freeExec (void *addr) |
|---|
| 1140 | { |
|---|
| 1141 | StgPtr p = (StgPtr)addr - 1; |
|---|
| 1142 | bdescr *bd = Bdescr((StgPtr)p); |
|---|
| 1143 | |
|---|
| 1144 | if ((bd->flags & BF_EXEC) == 0) { |
|---|
| 1145 | barf("freeExec: not executable"); |
|---|
| 1146 | } |
|---|
| 1147 | |
|---|
| 1148 | if (*(StgPtr)p == 0) { |
|---|
| 1149 | barf("freeExec: already free?"); |
|---|
| 1150 | } |
|---|
| 1151 | |
|---|
| 1152 | ACQUIRE_SM_LOCK; |
|---|
| 1153 | |
|---|
| 1154 | bd->gen_no -= *(StgPtr)p; |
|---|
| 1155 | *(StgPtr)p = 0; |
|---|
| 1156 | |
|---|
| 1157 | if (bd->gen_no == 0) { |
|---|
| 1158 | // Free the block if it is empty, but not if it is the block at |
|---|
| 1159 | // the head of the queue. |
|---|
| 1160 | if (bd != exec_block) { |
|---|
| 1161 | debugTrace(DEBUG_gc, "free exec block %p", bd->start); |
|---|
| 1162 | dbl_link_remove(bd, &exec_block); |
|---|
| 1163 | setExecutable(bd->start, bd->blocks * BLOCK_SIZE, rtsFalse); |
|---|
| 1164 | freeGroup(bd); |
|---|
| 1165 | } else { |
|---|
| 1166 | bd->free = bd->start; |
|---|
| 1167 | } |
|---|
| 1168 | } |
|---|
| 1169 | |
|---|
| 1170 | RELEASE_SM_LOCK |
|---|
| 1171 | } |
|---|
| 1172 | |
|---|
| 1173 | #endif /* mingw32_HOST_OS */ |
|---|
| 1174 | |
|---|
| 1175 | #ifdef DEBUG |
|---|
| 1176 | |
|---|
| 1177 | // handy function for use in gdb, because Bdescr() is inlined. |
|---|
| 1178 | extern bdescr *_bdescr (StgPtr p); |
|---|
| 1179 | |
|---|
| 1180 | bdescr * |
|---|
| 1181 | _bdescr (StgPtr p) |
|---|
| 1182 | { |
|---|
| 1183 | return Bdescr(p); |
|---|
| 1184 | } |
|---|
| 1185 | |
|---|
| 1186 | #endif |
|---|