| 1 | /* ----------------------------------------------------------------------------- |
|---|
| 2 | * Foreign export adjustor thunks |
|---|
| 3 | * |
|---|
| 4 | * Copyright (c) 1998. |
|---|
| 5 | * |
|---|
| 6 | * ---------------------------------------------------------------------------*/ |
|---|
| 7 | |
|---|
| 8 | /* A little bit of background... |
|---|
| 9 | |
|---|
| 10 | An adjustor thunk is a dynamically allocated code snippet that allows |
|---|
| 11 | Haskell closures to be viewed as C function pointers. |
|---|
| 12 | |
|---|
| 13 | Stable pointers provide a way for the outside world to get access to, |
|---|
| 14 | and evaluate, Haskell heap objects, with the RTS providing a small |
|---|
| 15 | range of ops for doing so. So, assuming we've got a stable pointer in |
|---|
| 16 | our hand in C, we can jump into the Haskell world and evaluate a callback |
|---|
| 17 | procedure, say. This works OK in some cases where callbacks are used, but |
|---|
| 18 | does require the external code to know about stable pointers and how to deal |
|---|
| 19 | with them. We'd like to hide the Haskell-nature of a callback and have it |
|---|
| 20 | be invoked just like any other C function pointer. |
|---|
| 21 | |
|---|
| 22 | Enter adjustor thunks. An adjustor thunk is a little piece of code |
|---|
| 23 | that's generated on-the-fly (one per Haskell closure being exported) |
|---|
| 24 | that, when entered using some 'universal' calling convention (e.g., the |
|---|
| 25 | C calling convention on platform X), pushes an implicit stable pointer |
|---|
| 26 | (to the Haskell callback) before calling another (static) C function stub |
|---|
| 27 | which takes care of entering the Haskell code via its stable pointer. |
|---|
| 28 | |
|---|
| 29 | An adjustor thunk is allocated on the C heap, and is called from within |
|---|
| 30 | Haskell just before handing out the function pointer to the Haskell (IO) |
|---|
| 31 | action. User code should never have to invoke it explicitly. |
|---|
| 32 | |
|---|
| 33 | An adjustor thunk differs from a C function pointer in one respect: when |
|---|
| 34 | the code is through with it, it has to be freed in order to release Haskell |
|---|
| 35 | and C resources. Failure to do so will result in memory leaks on both the C and |
|---|
| 36 | Haskell side. |
|---|
| 37 | */ |
|---|
| 38 | |
|---|
| 39 | #include "PosixSource.h" |
|---|
| 40 | #include "Rts.h" |
|---|
| 41 | |
|---|
| 42 | #include "RtsUtils.h" |
|---|
| 43 | #include "Stable.h" |
|---|
| 44 | |
|---|
| 45 | #if defined(USE_LIBFFI_FOR_ADJUSTORS) |
|---|
| 46 | #include "ffi.h" |
|---|
| 47 | #include <string.h> |
|---|
| 48 | #endif |
|---|
| 49 | |
|---|
| 50 | #if defined(i386_HOST_ARCH) |
|---|
| 51 | extern void adjustorCode(void); |
|---|
| 52 | #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) |
|---|
| 53 | // from AdjustorAsm.s |
|---|
| 54 | // not declared as a function so that AIX-style |
|---|
| 55 | // fundescs can never get in the way. |
|---|
| 56 | extern void *adjustorCode; |
|---|
| 57 | #endif |
|---|
| 58 | |
|---|
| 59 | #if defined(USE_LIBFFI_FOR_ADJUSTORS) |
|---|
| 60 | void |
|---|
| 61 | freeHaskellFunctionPtr(void* ptr) |
|---|
| 62 | { |
|---|
| 63 | ffi_closure *cl; |
|---|
| 64 | |
|---|
| 65 | cl = (ffi_closure*)ptr; |
|---|
| 66 | freeStablePtr(cl->user_data); |
|---|
| 67 | stgFree(cl->cif->arg_types); |
|---|
| 68 | stgFree(cl->cif); |
|---|
| 69 | freeExec(cl); |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | static ffi_type * char_to_ffi_type(char c) |
|---|
| 73 | { |
|---|
| 74 | switch (c) { |
|---|
| 75 | case 'v': return &ffi_type_void; |
|---|
| 76 | case 'f': return &ffi_type_float; |
|---|
| 77 | case 'd': return &ffi_type_double; |
|---|
| 78 | case 'L': return &ffi_type_sint64; |
|---|
| 79 | case 'l': return &ffi_type_uint64; |
|---|
| 80 | case 'W': return &ffi_type_sint32; |
|---|
| 81 | case 'w': return &ffi_type_uint32; |
|---|
| 82 | case 'S': return &ffi_type_sint16; |
|---|
| 83 | case 's': return &ffi_type_uint16; |
|---|
| 84 | case 'B': return &ffi_type_sint8; |
|---|
| 85 | case 'b': return &ffi_type_uint8; |
|---|
| 86 | case 'p': return &ffi_type_pointer; |
|---|
| 87 | default: barf("char_to_ffi_type: unknown type '%c'", c); |
|---|
| 88 | } |
|---|
| 89 | } |
|---|
| 90 | |
|---|
| 91 | void* |
|---|
| 92 | createAdjustor (int cconv, |
|---|
| 93 | StgStablePtr hptr, |
|---|
| 94 | StgFunPtr wptr, |
|---|
| 95 | char *typeString) |
|---|
| 96 | { |
|---|
| 97 | ffi_cif *cif; |
|---|
| 98 | ffi_type **arg_types; |
|---|
| 99 | nat n_args, i; |
|---|
| 100 | ffi_type *result_type; |
|---|
| 101 | ffi_closure *cl; |
|---|
| 102 | int r, abi; |
|---|
| 103 | void *code; |
|---|
| 104 | |
|---|
| 105 | n_args = strlen(typeString) - 1; |
|---|
| 106 | cif = stgMallocBytes(sizeof(ffi_cif), "createAdjustor"); |
|---|
| 107 | arg_types = stgMallocBytes(n_args * sizeof(ffi_type*), "createAdjustor"); |
|---|
| 108 | |
|---|
| 109 | result_type = char_to_ffi_type(typeString[0]); |
|---|
| 110 | for (i=0; i < n_args; i++) { |
|---|
| 111 | arg_types[i] = char_to_ffi_type(typeString[i+1]); |
|---|
| 112 | } |
|---|
| 113 | switch (cconv) { |
|---|
| 114 | #if defined(mingw32_HOST_OS) && defined(i386_HOST_ARCH) |
|---|
| 115 | case 0: /* stdcall */ |
|---|
| 116 | abi = FFI_STDCALL; |
|---|
| 117 | break; |
|---|
| 118 | #endif |
|---|
| 119 | case 1: /* ccall */ |
|---|
| 120 | abi = FFI_DEFAULT_ABI; |
|---|
| 121 | break; |
|---|
| 122 | default: |
|---|
| 123 | barf("createAdjustor: convention %d not supported on this platform", cconv); |
|---|
| 124 | } |
|---|
| 125 | |
|---|
| 126 | r = ffi_prep_cif(cif, abi, n_args, result_type, arg_types); |
|---|
| 127 | if (r != FFI_OK) barf("ffi_prep_cif failed: %d", r); |
|---|
| 128 | |
|---|
| 129 | cl = allocateExec(sizeof(ffi_closure), &code); |
|---|
| 130 | if (cl == NULL) { |
|---|
| 131 | barf("createAdjustor: failed to allocate memory"); |
|---|
| 132 | } |
|---|
| 133 | |
|---|
| 134 | r = ffi_prep_closure(cl, cif, (void*)wptr, hptr/*userdata*/); |
|---|
| 135 | if (r != FFI_OK) barf("ffi_prep_closure failed: %d", r); |
|---|
| 136 | |
|---|
| 137 | return (void*)code; |
|---|
| 138 | } |
|---|
| 139 | |
|---|
| 140 | #else // To end of file... |
|---|
| 141 | |
|---|
| 142 | #if defined(_WIN32) |
|---|
| 143 | #include <windows.h> |
|---|
| 144 | #endif |
|---|
| 145 | |
|---|
| 146 | #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS) |
|---|
| 147 | #include <string.h> |
|---|
| 148 | #endif |
|---|
| 149 | |
|---|
| 150 | #ifdef LEADING_UNDERSCORE |
|---|
| 151 | #define UNDERSCORE "_" |
|---|
| 152 | #else |
|---|
| 153 | #define UNDERSCORE "" |
|---|
| 154 | #endif |
|---|
| 155 | |
|---|
| 156 | #if defined(x86_64_HOST_ARCH) |
|---|
| 157 | /* |
|---|
| 158 | Now here's something obscure for you: |
|---|
| 159 | |
|---|
| 160 | When generating an adjustor thunk that uses the C calling |
|---|
| 161 | convention, we have to make sure that the thunk kicks off |
|---|
| 162 | the process of jumping into Haskell with a tail jump. Why? |
|---|
| 163 | Because as a result of jumping in into Haskell we may end |
|---|
| 164 | up freeing the very adjustor thunk we came from using |
|---|
| 165 | freeHaskellFunctionPtr(). Hence, we better not return to |
|---|
| 166 | the adjustor code on our way out, since it could by then |
|---|
| 167 | point to junk. |
|---|
| 168 | |
|---|
| 169 | The fix is readily at hand, just include the opcodes |
|---|
| 170 | for the C stack fixup code that we need to perform when |
|---|
| 171 | returning in some static piece of memory and arrange |
|---|
| 172 | to return to it before tail jumping from the adjustor thunk. |
|---|
| 173 | */ |
|---|
| 174 | static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void) |
|---|
| 175 | { |
|---|
| 176 | __asm__ ( |
|---|
| 177 | ".globl " UNDERSCORE "obscure_ccall_ret_code\n" |
|---|
| 178 | UNDERSCORE "obscure_ccall_ret_code:\n\t" |
|---|
| 179 | "addq $0x8, %rsp\n\t" |
|---|
| 180 | #if defined(mingw32_HOST_OS) |
|---|
| 181 | /* On Win64, we had to put the original return address after the |
|---|
| 182 | arg 1-4 spill slots, ro now we have to move it back */ |
|---|
| 183 | "movq 0x20(%rsp), %rcx\n" |
|---|
| 184 | "movq %rcx, (%rsp)\n" |
|---|
| 185 | #endif |
|---|
| 186 | "ret" |
|---|
| 187 | ); |
|---|
| 188 | } |
|---|
| 189 | extern void obscure_ccall_ret_code(void); |
|---|
| 190 | #endif |
|---|
| 191 | |
|---|
| 192 | #if defined(alpha_HOST_ARCH) |
|---|
| 193 | /* To get the definition of PAL_imb: */ |
|---|
| 194 | # if defined(linux_HOST_OS) |
|---|
| 195 | # include <asm/pal.h> |
|---|
| 196 | # else |
|---|
| 197 | # include <machine/pal.h> |
|---|
| 198 | # endif |
|---|
| 199 | #endif |
|---|
| 200 | |
|---|
| 201 | #if defined(ia64_HOST_ARCH) |
|---|
| 202 | |
|---|
| 203 | /* Layout of a function descriptor */ |
|---|
| 204 | typedef struct _IA64FunDesc { |
|---|
| 205 | StgWord64 ip; |
|---|
| 206 | StgWord64 gp; |
|---|
| 207 | } IA64FunDesc; |
|---|
| 208 | |
|---|
| 209 | static void * |
|---|
| 210 | stgAllocStable(size_t size_in_bytes, StgStablePtr *stable) |
|---|
| 211 | { |
|---|
| 212 | StgArrWords* arr; |
|---|
| 213 | nat data_size_in_words, total_size_in_words; |
|---|
| 214 | |
|---|
| 215 | /* round up to a whole number of words */ |
|---|
| 216 | data_size_in_words = ROUNDUP_BYTES_TO_WDS(size_in_bytes); |
|---|
| 217 | total_size_in_words = sizeofW(StgArrWords) + data_size_in_words; |
|---|
| 218 | |
|---|
| 219 | /* allocate and fill it in */ |
|---|
| 220 | arr = (StgArrWords *)allocate(total_size_in_words); |
|---|
| 221 | SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, size_in_bytes); |
|---|
| 222 | |
|---|
| 223 | /* obtain a stable ptr */ |
|---|
| 224 | *stable = getStablePtr((StgPtr)arr); |
|---|
| 225 | |
|---|
| 226 | /* and return a ptr to the goods inside the array */ |
|---|
| 227 | return(&(arr->payload)); |
|---|
| 228 | } |
|---|
| 229 | #endif |
|---|
| 230 | |
|---|
| 231 | #if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS) |
|---|
| 232 | __asm__("obscure_ccall_ret_code:\n\t" |
|---|
| 233 | "lwz 1,0(1)\n\t" |
|---|
| 234 | "lwz 0,4(1)\n\t" |
|---|
| 235 | "mtlr 0\n\t" |
|---|
| 236 | "blr"); |
|---|
| 237 | extern void obscure_ccall_ret_code(void); |
|---|
| 238 | #endif |
|---|
| 239 | |
|---|
| 240 | #if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) |
|---|
| 241 | #if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)) |
|---|
| 242 | |
|---|
| 243 | /* !!! !!! WARNING: !!! !!! |
|---|
| 244 | * This structure is accessed from AdjustorAsm.s |
|---|
| 245 | * Any changes here have to be mirrored in the offsets there. |
|---|
| 246 | */ |
|---|
| 247 | |
|---|
| 248 | typedef struct AdjustorStub { |
|---|
| 249 | #if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS) |
|---|
| 250 | unsigned lis; |
|---|
| 251 | unsigned ori; |
|---|
| 252 | unsigned lwz; |
|---|
| 253 | unsigned mtctr; |
|---|
| 254 | unsigned bctr; |
|---|
| 255 | StgFunPtr code; |
|---|
| 256 | #elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS) |
|---|
| 257 | /* powerpc64-darwin: just guessing that it won't use fundescs. */ |
|---|
| 258 | unsigned lis; |
|---|
| 259 | unsigned ori; |
|---|
| 260 | unsigned rldimi; |
|---|
| 261 | unsigned oris; |
|---|
| 262 | unsigned ori2; |
|---|
| 263 | unsigned lwz; |
|---|
| 264 | unsigned mtctr; |
|---|
| 265 | unsigned bctr; |
|---|
| 266 | StgFunPtr code; |
|---|
| 267 | #else |
|---|
| 268 | /* fundesc-based ABIs */ |
|---|
| 269 | #define FUNDESCS |
|---|
| 270 | StgFunPtr code; |
|---|
| 271 | struct AdjustorStub |
|---|
| 272 | *toc; |
|---|
| 273 | void *env; |
|---|
| 274 | #endif |
|---|
| 275 | StgStablePtr hptr; |
|---|
| 276 | StgFunPtr wptr; |
|---|
| 277 | StgInt negative_framesize; |
|---|
| 278 | StgInt extrawords_plus_one; |
|---|
| 279 | } AdjustorStub; |
|---|
| 280 | |
|---|
| 281 | #endif |
|---|
| 282 | #endif |
|---|
| 283 | |
|---|
| 284 | #if defined(i386_HOST_ARCH) |
|---|
| 285 | |
|---|
| 286 | /* !!! !!! WARNING: !!! !!! |
|---|
| 287 | * This structure is accessed from AdjustorAsm.s |
|---|
| 288 | * Any changes here have to be mirrored in the offsets there. |
|---|
| 289 | */ |
|---|
| 290 | |
|---|
| 291 | typedef struct AdjustorStub { |
|---|
| 292 | unsigned char call[8]; |
|---|
| 293 | StgStablePtr hptr; |
|---|
| 294 | StgFunPtr wptr; |
|---|
| 295 | StgInt frame_size; |
|---|
| 296 | StgInt argument_size; |
|---|
| 297 | } AdjustorStub; |
|---|
| 298 | #endif |
|---|
| 299 | |
|---|
| 300 | #if defined(i386_HOST_ARCH) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) |
|---|
| 301 | static int totalArgumentSize(char *typeString) |
|---|
| 302 | { |
|---|
| 303 | int sz = 0; |
|---|
| 304 | while(*typeString) |
|---|
| 305 | { |
|---|
| 306 | char t = *typeString++; |
|---|
| 307 | |
|---|
| 308 | switch(t) |
|---|
| 309 | { |
|---|
| 310 | // on 32-bit platforms, Double and Int64 occupy two words. |
|---|
| 311 | case 'd': |
|---|
| 312 | case 'l': |
|---|
| 313 | case 'L': |
|---|
| 314 | if(sizeof(void*) == 4) |
|---|
| 315 | { |
|---|
| 316 | sz += 2; |
|---|
| 317 | break; |
|---|
| 318 | } |
|---|
| 319 | // everything else is one word. |
|---|
| 320 | default: |
|---|
| 321 | sz += 1; |
|---|
| 322 | } |
|---|
| 323 | } |
|---|
| 324 | return sz; |
|---|
| 325 | } |
|---|
| 326 | #endif |
|---|
| 327 | |
|---|
| 328 | void* |
|---|
| 329 | createAdjustor(int cconv, StgStablePtr hptr, |
|---|
| 330 | StgFunPtr wptr, |
|---|
| 331 | char *typeString |
|---|
| 332 | #if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH) |
|---|
| 333 | STG_UNUSED |
|---|
| 334 | #endif |
|---|
| 335 | ) |
|---|
| 336 | { |
|---|
| 337 | void *adjustor = NULL; |
|---|
| 338 | void *code; |
|---|
| 339 | |
|---|
| 340 | switch (cconv) |
|---|
| 341 | { |
|---|
| 342 | case 0: /* _stdcall */ |
|---|
| 343 | #if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS) |
|---|
| 344 | /* Magic constant computed by inspecting the code length of |
|---|
| 345 | the following assembly language snippet |
|---|
| 346 | (offset and machine code prefixed): |
|---|
| 347 | |
|---|
| 348 | <0>: 58 popl %eax # temp. remove ret addr.. |
|---|
| 349 | <1>: 68 fd fc fe fa pushl 0xfafefcfd # constant is large enough to |
|---|
| 350 | # hold a StgStablePtr |
|---|
| 351 | <6>: 50 pushl %eax # put back ret. addr |
|---|
| 352 | <7>: b8 fa ef ff 00 movl $0x00ffeffa, %eax # load up wptr |
|---|
| 353 | <c>: ff e0 jmp %eax # and jump to it. |
|---|
| 354 | # the callee cleans up the stack |
|---|
| 355 | */ |
|---|
| 356 | adjustor = allocateExec(14,&code); |
|---|
| 357 | { |
|---|
| 358 | unsigned char *const adj_code = (unsigned char *)adjustor; |
|---|
| 359 | adj_code[0x00] = (unsigned char)0x58; /* popl %eax */ |
|---|
| 360 | |
|---|
| 361 | adj_code[0x01] = (unsigned char)0x68; /* pushl hptr (which is a dword immediate ) */ |
|---|
| 362 | *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr; |
|---|
| 363 | |
|---|
| 364 | adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */ |
|---|
| 365 | |
|---|
| 366 | adj_code[0x07] = (unsigned char)0xb8; /* movl $wptr, %eax */ |
|---|
| 367 | *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr; |
|---|
| 368 | |
|---|
| 369 | adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */ |
|---|
| 370 | adj_code[0x0d] = (unsigned char)0xe0; |
|---|
| 371 | } |
|---|
| 372 | #endif |
|---|
| 373 | break; |
|---|
| 374 | |
|---|
| 375 | case 1: /* _ccall */ |
|---|
| 376 | #if defined(i386_HOST_ARCH) |
|---|
| 377 | { |
|---|
| 378 | /* |
|---|
| 379 | Most of the trickiness here is due to the need to keep the |
|---|
| 380 | stack pointer 16-byte aligned (see #5250). That means we |
|---|
| 381 | can't just push another argument on the stack and call the |
|---|
| 382 | wrapper, we may have to shuffle the whole argument block. |
|---|
| 383 | |
|---|
| 384 | We offload most of the work to AdjustorAsm.S. |
|---|
| 385 | */ |
|---|
| 386 | AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub),&code); |
|---|
| 387 | adjustor = adjustorStub; |
|---|
| 388 | |
|---|
| 389 | int sz = totalArgumentSize(typeString); |
|---|
| 390 | |
|---|
| 391 | adjustorStub->call[0] = 0xe8; |
|---|
| 392 | *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)adjustorStub + 5); |
|---|
| 393 | adjustorStub->hptr = hptr; |
|---|
| 394 | adjustorStub->wptr = wptr; |
|---|
| 395 | |
|---|
| 396 | // The adjustor puts the following things on the stack: |
|---|
| 397 | // 1.) %ebp link |
|---|
| 398 | // 2.) padding and (a copy of) the arguments |
|---|
| 399 | // 3.) a dummy argument |
|---|
| 400 | // 4.) hptr |
|---|
| 401 | // 5.) return address (for returning to the adjustor) |
|---|
| 402 | // All these have to add up to a multiple of 16. |
|---|
| 403 | |
|---|
| 404 | // first, include everything in frame_size |
|---|
| 405 | adjustorStub->frame_size = sz * 4 + 16; |
|---|
| 406 | // align to 16 bytes |
|---|
| 407 | adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15; |
|---|
| 408 | // only count 2.) and 3.) as part of frame_size |
|---|
| 409 | adjustorStub->frame_size -= 12; |
|---|
| 410 | adjustorStub->argument_size = sz; |
|---|
| 411 | } |
|---|
| 412 | |
|---|
| 413 | #elif defined(x86_64_HOST_ARCH) |
|---|
| 414 | |
|---|
| 415 | # if defined(mingw32_HOST_OS) |
|---|
| 416 | /* |
|---|
| 417 | stack at call: |
|---|
| 418 | argn |
|---|
| 419 | ... |
|---|
| 420 | arg5 |
|---|
| 421 | return address |
|---|
| 422 | %rcx,%rdx,%r8,%r9 = arg1..arg4 |
|---|
| 423 | |
|---|
| 424 | if there are <4 integer args, then we can just push the |
|---|
| 425 | StablePtr into %rcx and shuffle the other args up. |
|---|
| 426 | |
|---|
| 427 | If there are >=4 integer args, then we have to flush one arg |
|---|
| 428 | to the stack, and arrange to adjust the stack ptr on return. |
|---|
| 429 | The stack will be rearranged to this: |
|---|
| 430 | |
|---|
| 431 | argn |
|---|
| 432 | ... |
|---|
| 433 | arg5 |
|---|
| 434 | return address *** <-- dummy arg in stub fn. |
|---|
| 435 | arg4 |
|---|
| 436 | obscure_ccall_ret_code |
|---|
| 437 | |
|---|
| 438 | This unfortunately means that the type of the stub function |
|---|
| 439 | must have a dummy argument for the original return address |
|---|
| 440 | pointer inserted just after the 4th integer argument. |
|---|
| 441 | |
|---|
| 442 | Code for the simple case: |
|---|
| 443 | |
|---|
| 444 | 0: 4d 89 c1 mov %r8,%r9 |
|---|
| 445 | 3: 49 89 d0 mov %rdx,%r8 |
|---|
| 446 | 6: 48 89 ca mov %rcx,%rdx |
|---|
| 447 | 9: f2 0f 10 da movsd %xmm2,%xmm3 |
|---|
| 448 | d: f2 0f 10 d1 movsd %xmm1,%xmm2 |
|---|
| 449 | 11: f2 0f 10 c8 movsd %xmm0,%xmm1 |
|---|
| 450 | 15: 48 8b 0d 0c 00 00 00 mov 0xc(%rip),%rcx # 28 <.text+0x28> |
|---|
| 451 | 1c: ff 25 0e 00 00 00 jmpq *0xe(%rip) # 30 <.text+0x30> |
|---|
| 452 | 22: 90 nop |
|---|
| 453 | [...] |
|---|
| 454 | |
|---|
| 455 | |
|---|
| 456 | And the version for >=4 integer arguments: |
|---|
| 457 | |
|---|
| 458 | [we want to push the 4th argument (either %r9 or %xmm3, depending on |
|---|
| 459 | whether it is a floating arg or not) and the return address onto the |
|---|
| 460 | stack. However, slots 1-4 are reserved for code we call to spill its |
|---|
| 461 | args 1-4 into, so we can't just push them onto the bottom of the stack. |
|---|
| 462 | So first put the 4th argument onto the stack, above what will be the |
|---|
| 463 | spill slots.] |
|---|
| 464 | 0: 48 83 ec 08 sub $0x8,%rsp |
|---|
| 465 | [if non-floating arg, then do this:] |
|---|
| 466 | 4: 90 nop |
|---|
| 467 | 5: 4c 89 4c 24 20 mov %r9,0x20(%rsp) |
|---|
| 468 | [else if floating arg then do this:] |
|---|
| 469 | 4: f2 0f 11 5c 24 20 movsd %xmm3,0x20(%rsp) |
|---|
| 470 | [end if] |
|---|
| 471 | [Now push the new return address onto the stack] |
|---|
| 472 | a: ff 35 30 00 00 00 pushq 0x30(%rip) # 40 <.text+0x40> |
|---|
| 473 | [But the old return address has been moved up into a spill slot, so |
|---|
| 474 | we need to move it above them] |
|---|
| 475 | 10: 4c 8b 4c 24 10 mov 0x10(%rsp),%r9 |
|---|
| 476 | 15: 4c 89 4c 24 30 mov %r9,0x30(%rsp) |
|---|
| 477 | [Now we do the normal register shuffle-up etc] |
|---|
| 478 | 1a: 4d 89 c1 mov %r8,%r9 |
|---|
| 479 | 1d: 49 89 d0 mov %rdx,%r8 |
|---|
| 480 | 20: 48 89 ca mov %rcx,%rdx |
|---|
| 481 | 23: f2 0f 10 da movsd %xmm2,%xmm3 |
|---|
| 482 | 27: f2 0f 10 d1 movsd %xmm1,%xmm2 |
|---|
| 483 | 2b: f2 0f 10 c8 movsd %xmm0,%xmm1 |
|---|
| 484 | 2f: 48 8b 0d 12 00 00 00 mov 0x12(%rip),%rcx # 48 <.text+0x48> |
|---|
| 485 | 36: ff 25 14 00 00 00 jmpq *0x14(%rip) # 50 <.text+0x50> |
|---|
| 486 | 3c: 90 nop |
|---|
| 487 | 3d: 90 nop |
|---|
| 488 | 3e: 90 nop |
|---|
| 489 | 3f: 90 nop |
|---|
| 490 | [...] |
|---|
| 491 | |
|---|
| 492 | */ |
|---|
| 493 | { |
|---|
| 494 | int i = 0; |
|---|
| 495 | int fourthFloating; |
|---|
| 496 | char *c; |
|---|
| 497 | StgWord8 *adj_code; |
|---|
| 498 | |
|---|
| 499 | // determine whether we have 4 or more integer arguments, |
|---|
| 500 | // and therefore need to flush one to the stack. |
|---|
| 501 | for (c = typeString; *c != '\0'; c++) { |
|---|
| 502 | i++; |
|---|
| 503 | if (i == 4) { |
|---|
| 504 | fourthFloating = (*c == 'f' || *c == 'd'); |
|---|
| 505 | break; |
|---|
| 506 | } |
|---|
| 507 | } |
|---|
| 508 | |
|---|
| 509 | if (i < 4) { |
|---|
| 510 | adjustor = allocateExec(0x38,&code); |
|---|
| 511 | adj_code = (StgWord8*)adjustor; |
|---|
| 512 | |
|---|
| 513 | *(StgInt32 *)adj_code = 0x49c1894d; |
|---|
| 514 | *(StgInt32 *)(adj_code+0x4) = 0x8948d089; |
|---|
| 515 | *(StgInt32 *)(adj_code+0x8) = 0x100ff2ca; |
|---|
| 516 | *(StgInt32 *)(adj_code+0xc) = 0x100ff2da; |
|---|
| 517 | *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1; |
|---|
| 518 | *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8; |
|---|
| 519 | *(StgInt32 *)(adj_code+0x18) = 0x0000000c; |
|---|
| 520 | |
|---|
| 521 | *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff; |
|---|
| 522 | *(StgInt32 *)(adj_code+0x20) = 0x00000000; |
|---|
| 523 | *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr; |
|---|
| 524 | *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr; |
|---|
| 525 | } |
|---|
| 526 | else |
|---|
| 527 | { |
|---|
| 528 | adjustor = allocateExec(0x58,&code); |
|---|
| 529 | adj_code = (StgWord8*)adjustor; |
|---|
| 530 | *(StgInt32 *)adj_code = 0x08ec8348; |
|---|
| 531 | *(StgInt32 *)(adj_code+0x4) = fourthFloating ? 0x5c110ff2 |
|---|
| 532 | : 0x4c894c90; |
|---|
| 533 | *(StgInt32 *)(adj_code+0x8) = 0x35ff2024; |
|---|
| 534 | *(StgInt32 *)(adj_code+0xc) = 0x00000030; |
|---|
| 535 | *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c; |
|---|
| 536 | *(StgInt32 *)(adj_code+0x14) = 0x4c894c10; |
|---|
| 537 | *(StgInt32 *)(adj_code+0x18) = 0x894d3024; |
|---|
| 538 | *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1; |
|---|
| 539 | *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948; |
|---|
| 540 | *(StgInt32 *)(adj_code+0x24) = 0xf2da100f; |
|---|
| 541 | *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f; |
|---|
| 542 | *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f; |
|---|
| 543 | *(StgInt32 *)(adj_code+0x30) = 0x00120d8b; |
|---|
| 544 | *(StgInt32 *)(adj_code+0x34) = 0x25ff0000; |
|---|
| 545 | *(StgInt32 *)(adj_code+0x38) = 0x00000014; |
|---|
| 546 | *(StgInt32 *)(adj_code+0x3c) = 0x90909090; |
|---|
| 547 | *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code; |
|---|
| 548 | *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr; |
|---|
| 549 | *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr; |
|---|
| 550 | } |
|---|
| 551 | } |
|---|
| 552 | |
|---|
| 553 | # else |
|---|
| 554 | /* |
|---|
| 555 | stack at call: |
|---|
| 556 | argn |
|---|
| 557 | ... |
|---|
| 558 | arg7 |
|---|
| 559 | return address |
|---|
| 560 | %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg1..arg6 |
|---|
| 561 | |
|---|
| 562 | if there are <6 integer args, then we can just push the |
|---|
| 563 | StablePtr into %edi and shuffle the other args up. |
|---|
| 564 | |
|---|
| 565 | If there are >=6 integer args, then we have to flush one arg |
|---|
| 566 | to the stack, and arrange to adjust the stack ptr on return. |
|---|
| 567 | The stack will be rearranged to this: |
|---|
| 568 | |
|---|
| 569 | argn |
|---|
| 570 | ... |
|---|
| 571 | arg7 |
|---|
| 572 | return address *** <-- dummy arg in stub fn. |
|---|
| 573 | arg6 |
|---|
| 574 | obscure_ccall_ret_code |
|---|
| 575 | |
|---|
| 576 | This unfortunately means that the type of the stub function |
|---|
| 577 | must have a dummy argument for the original return address |
|---|
| 578 | pointer inserted just after the 6th integer argument. |
|---|
| 579 | |
|---|
| 580 | Code for the simple case: |
|---|
| 581 | |
|---|
| 582 | 0: 4d 89 c1 mov %r8,%r9 |
|---|
| 583 | 3: 49 89 c8 mov %rcx,%r8 |
|---|
| 584 | 6: 48 89 d1 mov %rdx,%rcx |
|---|
| 585 | 9: 48 89 f2 mov %rsi,%rdx |
|---|
| 586 | c: 48 89 fe mov %rdi,%rsi |
|---|
| 587 | f: 48 8b 3d 0a 00 00 00 mov 10(%rip),%rdi |
|---|
| 588 | 16: ff 25 0c 00 00 00 jmpq *12(%rip) |
|---|
| 589 | ... |
|---|
| 590 | 20: .quad 0 # aligned on 8-byte boundary |
|---|
| 591 | 28: .quad 0 # aligned on 8-byte boundary |
|---|
| 592 | |
|---|
| 593 | |
|---|
| 594 | And the version for >=6 integer arguments: |
|---|
| 595 | |
|---|
| 596 | 0: 41 51 push %r9 |
|---|
| 597 | 2: ff 35 20 00 00 00 pushq 32(%rip) # 28 <ccall_adjustor+0x28> |
|---|
| 598 | 8: 4d 89 c1 mov %r8,%r9 |
|---|
| 599 | b: 49 89 c8 mov %rcx,%r8 |
|---|
| 600 | e: 48 89 d1 mov %rdx,%rcx |
|---|
| 601 | 11: 48 89 f2 mov %rsi,%rdx |
|---|
| 602 | 14: 48 89 fe mov %rdi,%rsi |
|---|
| 603 | 17: 48 8b 3d 12 00 00 00 mov 18(%rip),%rdi # 30 <ccall_adjustor+0x30> |
|---|
| 604 | 1e: ff 25 14 00 00 00 jmpq *20(%rip) # 38 <ccall_adjustor+0x38> |
|---|
| 605 | ... |
|---|
| 606 | 28: .quad 0 # aligned on 8-byte boundary |
|---|
| 607 | 30: .quad 0 # aligned on 8-byte boundary |
|---|
| 608 | 38: .quad 0 # aligned on 8-byte boundary |
|---|
| 609 | */ |
|---|
| 610 | |
|---|
| 611 | { |
|---|
| 612 | int i = 0; |
|---|
| 613 | char *c; |
|---|
| 614 | StgWord8 *adj_code; |
|---|
| 615 | |
|---|
| 616 | // determine whether we have 6 or more integer arguments, |
|---|
| 617 | // and therefore need to flush one to the stack. |
|---|
| 618 | for (c = typeString; *c != '\0'; c++) { |
|---|
| 619 | if (*c != 'f' && *c != 'd') i++; |
|---|
| 620 | if (i == 6) break; |
|---|
| 621 | } |
|---|
| 622 | |
|---|
| 623 | if (i < 6) { |
|---|
| 624 | adjustor = allocateExec(0x30,&code); |
|---|
| 625 | adj_code = (StgWord8*)adjustor; |
|---|
| 626 | |
|---|
| 627 | *(StgInt32 *)adj_code = 0x49c1894d; |
|---|
| 628 | *(StgInt32 *)(adj_code+0x4) = 0x8948c889; |
|---|
| 629 | *(StgInt32 *)(adj_code+0x8) = 0xf28948d1; |
|---|
| 630 | *(StgInt32 *)(adj_code+0xc) = 0x48fe8948; |
|---|
| 631 | *(StgInt32 *)(adj_code+0x10) = 0x000a3d8b; |
|---|
| 632 | *(StgInt32 *)(adj_code+0x14) = 0x25ff0000; |
|---|
| 633 | *(StgInt32 *)(adj_code+0x18) = 0x0000000c; |
|---|
| 634 | *(StgInt64 *)(adj_code+0x20) = (StgInt64)hptr; |
|---|
| 635 | *(StgInt64 *)(adj_code+0x28) = (StgInt64)wptr; |
|---|
| 636 | } |
|---|
| 637 | else |
|---|
| 638 | { |
|---|
| 639 | adjustor = allocateExec(0x40,&code); |
|---|
| 640 | adj_code = (StgWord8*)adjustor; |
|---|
| 641 | |
|---|
| 642 | *(StgInt32 *)adj_code = 0x35ff5141; |
|---|
| 643 | *(StgInt32 *)(adj_code+0x4) = 0x00000020; |
|---|
| 644 | *(StgInt32 *)(adj_code+0x8) = 0x49c1894d; |
|---|
| 645 | *(StgInt32 *)(adj_code+0xc) = 0x8948c889; |
|---|
| 646 | *(StgInt32 *)(adj_code+0x10) = 0xf28948d1; |
|---|
| 647 | *(StgInt32 *)(adj_code+0x14) = 0x48fe8948; |
|---|
| 648 | *(StgInt32 *)(adj_code+0x18) = 0x00123d8b; |
|---|
| 649 | *(StgInt32 *)(adj_code+0x1c) = 0x25ff0000; |
|---|
| 650 | *(StgInt32 *)(adj_code+0x20) = 0x00000014; |
|---|
| 651 | |
|---|
| 652 | *(StgInt64 *)(adj_code+0x28) = (StgInt64)obscure_ccall_ret_code; |
|---|
| 653 | *(StgInt64 *)(adj_code+0x30) = (StgInt64)hptr; |
|---|
| 654 | *(StgInt64 *)(adj_code+0x38) = (StgInt64)wptr; |
|---|
| 655 | } |
|---|
| 656 | } |
|---|
| 657 | # endif |
|---|
| 658 | |
|---|
| 659 | |
|---|
| 660 | #elif defined(sparc_HOST_ARCH) |
|---|
| 661 | /* Magic constant computed by inspecting the code length of the following |
|---|
| 662 | assembly language snippet (offset and machine code prefixed): |
|---|
| 663 | |
|---|
| 664 | <00>: 9C23A008 sub %sp, 8, %sp ! make room for %o4/%o5 in caller's frame |
|---|
| 665 | <04>: DA23A060 st %o5, [%sp + 96] ! shift registers by 2 positions |
|---|
| 666 | <08>: D823A05C st %o4, [%sp + 92] |
|---|
| 667 | <0C>: 9A10000B mov %o3, %o5 |
|---|
| 668 | <10>: 9810000A mov %o2, %o4 |
|---|
| 669 | <14>: 96100009 mov %o1, %o3 |
|---|
| 670 | <18>: 94100008 mov %o0, %o2 |
|---|
| 671 | <1C>: 13000000 sethi %hi(wptr), %o1 ! load up wptr (1 of 2) |
|---|
| 672 | <20>: 11000000 sethi %hi(hptr), %o0 ! load up hptr (1 of 2) |
|---|
| 673 | <24>: 81C26000 jmp %o1 + %lo(wptr) ! jump to wptr (load 2 of 2) |
|---|
| 674 | <28>: 90122000 or %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot) |
|---|
| 675 | <2C> 00000000 ! place for getting hptr back easily |
|---|
| 676 | |
|---|
| 677 | ccall'ing on SPARC is easy, because we are quite lucky to push a |
|---|
| 678 | multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the |
|---|
| 679 | existing arguments (note that %sp must stay double-word aligned at |
|---|
| 680 | all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf). |
|---|
| 681 | To do this, we extend the *caller's* stack frame by 2 words and shift |
|---|
| 682 | the output registers used for argument passing (%o0 - %o5, we are a *leaf* |
|---|
| 683 | procedure because of the tail-jump) by 2 positions. This makes room in |
|---|
| 684 | %o0 and %o1 for the additinal arguments, namely hptr and a dummy (used |
|---|
| 685 | for destination addr of jump on SPARC, return address on x86, ...). This |
|---|
| 686 | shouldn't cause any problems for a C-like caller: alloca is implemented |
|---|
| 687 | similarly, and local variables should be accessed via %fp, not %sp. In a |
|---|
| 688 | nutshell: This should work! (Famous last words! :-) |
|---|
| 689 | */ |
|---|
| 690 | adjustor = allocateExec(4*(11+1),&code); |
|---|
| 691 | { |
|---|
| 692 | unsigned long *const adj_code = (unsigned long *)adjustor; |
|---|
| 693 | |
|---|
| 694 | adj_code[ 0] = 0x9C23A008UL; /* sub %sp, 8, %sp */ |
|---|
| 695 | adj_code[ 1] = 0xDA23A060UL; /* st %o5, [%sp + 96] */ |
|---|
| 696 | adj_code[ 2] = 0xD823A05CUL; /* st %o4, [%sp + 92] */ |
|---|
| 697 | adj_code[ 3] = 0x9A10000BUL; /* mov %o3, %o5 */ |
|---|
| 698 | adj_code[ 4] = 0x9810000AUL; /* mov %o2, %o4 */ |
|---|
| 699 | adj_code[ 5] = 0x96100009UL; /* mov %o1, %o3 */ |
|---|
| 700 | adj_code[ 6] = 0x94100008UL; /* mov %o0, %o2 */ |
|---|
| 701 | adj_code[ 7] = 0x13000000UL; /* sethi %hi(wptr), %o1 */ |
|---|
| 702 | adj_code[ 7] |= ((unsigned long)wptr) >> 10; |
|---|
| 703 | adj_code[ 8] = 0x11000000UL; /* sethi %hi(hptr), %o0 */ |
|---|
| 704 | adj_code[ 8] |= ((unsigned long)hptr) >> 10; |
|---|
| 705 | adj_code[ 9] = 0x81C26000UL; /* jmp %o1 + %lo(wptr) */ |
|---|
| 706 | adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL; |
|---|
| 707 | adj_code[10] = 0x90122000UL; /* or %o0, %lo(hptr), %o0 */ |
|---|
| 708 | adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL; |
|---|
| 709 | |
|---|
| 710 | adj_code[11] = (unsigned long)hptr; |
|---|
| 711 | |
|---|
| 712 | /* flush cache */ |
|---|
| 713 | asm("flush %0" : : "r" (adj_code )); |
|---|
| 714 | asm("flush %0" : : "r" (adj_code + 2)); |
|---|
| 715 | asm("flush %0" : : "r" (adj_code + 4)); |
|---|
| 716 | asm("flush %0" : : "r" (adj_code + 6)); |
|---|
| 717 | asm("flush %0" : : "r" (adj_code + 10)); |
|---|
| 718 | |
|---|
| 719 | /* max. 5 instructions latency, and we need at >= 1 for returning */ |
|---|
| 720 | asm("nop"); |
|---|
| 721 | asm("nop"); |
|---|
| 722 | asm("nop"); |
|---|
| 723 | asm("nop"); |
|---|
| 724 | } |
|---|
| 725 | #elif defined(alpha_HOST_ARCH) |
|---|
| 726 | /* Magic constant computed by inspecting the code length of |
|---|
| 727 | the following assembly language snippet |
|---|
| 728 | (offset and machine code prefixed; note that the machine code |
|---|
| 729 | shown is longwords stored in little-endian order): |
|---|
| 730 | |
|---|
| 731 | <00>: 46520414 mov a2, a4 |
|---|
| 732 | <04>: 46100412 mov a0, a2 |
|---|
| 733 | <08>: a61b0020 ldq a0, 0x20(pv) # load up hptr |
|---|
| 734 | <0c>: 46730415 mov a3, a5 |
|---|
| 735 | <10>: a77b0028 ldq pv, 0x28(pv) # load up wptr |
|---|
| 736 | <14>: 46310413 mov a1, a3 |
|---|
| 737 | <18>: 6bfb---- jmp (pv), <hint> # jump to wptr (with hint) |
|---|
| 738 | <1c>: 00000000 # padding for alignment |
|---|
| 739 | <20>: [8 bytes for hptr quadword] |
|---|
| 740 | <28>: [8 bytes for wptr quadword] |
|---|
| 741 | |
|---|
| 742 | The "computed" jump at <08> above is really a jump to a fixed |
|---|
| 743 | location. Accordingly, we place an always-correct hint in the |
|---|
| 744 | jump instruction, namely the address offset from <0c> to wptr, |
|---|
| 745 | divided by 4, taking the lowest 14 bits. |
|---|
| 746 | |
|---|
| 747 | We only support passing 4 or fewer argument words, for the same |
|---|
| 748 | reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01. |
|---|
| 749 | On the Alpha the first 6 integer arguments are in a0 through a5, |
|---|
| 750 | and the rest on the stack. Hence we want to shuffle the original |
|---|
| 751 | caller's arguments by two. |
|---|
| 752 | |
|---|
| 753 | On the Alpha the calling convention is so complex and dependent |
|---|
| 754 | on the callee's signature -- for example, the stack pointer has |
|---|
| 755 | to be a multiple of 16 -- that it seems impossible to me [ccshan] |
|---|
| 756 | to handle the general case correctly without changing how the |
|---|
| 757 | adjustor is called from C. For now, our solution of shuffling |
|---|
| 758 | registers only and ignoring the stack only works if the original |
|---|
| 759 | caller passed 4 or fewer argument words. |
|---|
| 760 | |
|---|
| 761 | TODO: Depending on how much allocation overhead stgMallocBytes uses for |
|---|
| 762 | header information (more precisely, if the overhead is no more than |
|---|
| 763 | 4 bytes), we should move the first three instructions above down by |
|---|
| 764 | 4 bytes (getting rid of the nop), hence saving memory. [ccshan] |
|---|
| 765 | */ |
|---|
| 766 | ASSERT(((StgWord64)wptr & 3) == 0); |
|---|
| 767 | adjustor = allocateExec(48,&code); |
|---|
| 768 | { |
|---|
| 769 | StgWord64 *const code = (StgWord64 *)adjustor; |
|---|
| 770 | |
|---|
| 771 | code[0] = 0x4610041246520414L; |
|---|
| 772 | code[1] = 0x46730415a61b0020L; |
|---|
| 773 | code[2] = 0x46310413a77b0028L; |
|---|
| 774 | code[3] = 0x000000006bfb0000L |
|---|
| 775 | | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff); |
|---|
| 776 | |
|---|
| 777 | code[4] = (StgWord64)hptr; |
|---|
| 778 | code[5] = (StgWord64)wptr; |
|---|
| 779 | |
|---|
| 780 | /* Ensure that instruction cache is consistent with our new code */ |
|---|
| 781 | __asm__ volatile("call_pal %0" : : "i" (PAL_imb)); |
|---|
| 782 | } |
|---|
| 783 | #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS) |
|---|
| 784 | |
|---|
| 785 | #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF)) |
|---|
| 786 | #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16)) |
|---|
| 787 | { |
|---|
| 788 | /* The PowerPC Linux (32-bit) calling convention is annoyingly complex. |
|---|
| 789 | We need to calculate all the details of the stack frame layout, |
|---|
| 790 | taking into account the types of all the arguments, and then |
|---|
| 791 | generate code on the fly. */ |
|---|
| 792 | |
|---|
| 793 | int src_gpr = 3, dst_gpr = 5; |
|---|
| 794 | int fpr = 3; |
|---|
| 795 | int src_offset = 0, dst_offset = 0; |
|---|
| 796 | int n = strlen(typeString),i; |
|---|
| 797 | int src_locs[n], dst_locs[n]; |
|---|
| 798 | int frameSize; |
|---|
| 799 | unsigned *code; |
|---|
| 800 | |
|---|
| 801 | /* Step 1: |
|---|
| 802 | Calculate where the arguments should go. |
|---|
| 803 | src_locs[] will contain the locations of the arguments in the |
|---|
| 804 | original stack frame passed to the adjustor. |
|---|
| 805 | dst_locs[] will contain the locations of the arguments after the |
|---|
| 806 | adjustor runs, on entry to the wrapper proc pointed to by wptr. |
|---|
| 807 | |
|---|
| 808 | This algorithm is based on the one described on page 3-19 of the |
|---|
| 809 | System V ABI PowerPC Processor Supplement. |
|---|
| 810 | */ |
|---|
| 811 | for(i=0;typeString[i];i++) |
|---|
| 812 | { |
|---|
| 813 | char t = typeString[i]; |
|---|
| 814 | if((t == 'f' || t == 'd') && fpr <= 8) |
|---|
| 815 | src_locs[i] = dst_locs[i] = -32-(fpr++); |
|---|
| 816 | else |
|---|
| 817 | { |
|---|
| 818 | if((t == 'l' || t == 'L') && src_gpr <= 9) |
|---|
| 819 | { |
|---|
| 820 | if((src_gpr & 1) == 0) |
|---|
| 821 | src_gpr++; |
|---|
| 822 | src_locs[i] = -src_gpr; |
|---|
| 823 | src_gpr += 2; |
|---|
| 824 | } |
|---|
| 825 | else if((t == 'w' || t == 'W') && src_gpr <= 10) |
|---|
| 826 | { |
|---|
| 827 | src_locs[i] = -(src_gpr++); |
|---|
| 828 | } |
|---|
| 829 | else |
|---|
| 830 | { |
|---|
| 831 | if(t == 'l' || t == 'L' || t == 'd') |
|---|
| 832 | { |
|---|
| 833 | if(src_offset % 8) |
|---|
| 834 | src_offset += 4; |
|---|
| 835 | } |
|---|
| 836 | src_locs[i] = src_offset; |
|---|
| 837 | src_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4; |
|---|
| 838 | } |
|---|
| 839 | |
|---|
| 840 | if((t == 'l' || t == 'L') && dst_gpr <= 9) |
|---|
| 841 | { |
|---|
| 842 | if((dst_gpr & 1) == 0) |
|---|
| 843 | dst_gpr++; |
|---|
| 844 | dst_locs[i] = -dst_gpr; |
|---|
| 845 | dst_gpr += 2; |
|---|
| 846 | } |
|---|
| 847 | else if((t == 'w' || t == 'W') && dst_gpr <= 10) |
|---|
| 848 | { |
|---|
| 849 | dst_locs[i] = -(dst_gpr++); |
|---|
| 850 | } |
|---|
| 851 | else |
|---|
| 852 | { |
|---|
| 853 | if(t == 'l' || t == 'L' || t == 'd') |
|---|
| 854 | { |
|---|
| 855 | if(dst_offset % 8) |
|---|
| 856 | dst_offset += 4; |
|---|
| 857 | } |
|---|
| 858 | dst_locs[i] = dst_offset; |
|---|
| 859 | dst_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4; |
|---|
| 860 | } |
|---|
| 861 | } |
|---|
| 862 | } |
|---|
| 863 | |
|---|
| 864 | frameSize = dst_offset + 8; |
|---|
| 865 | frameSize = (frameSize+15) & ~0xF; |
|---|
| 866 | |
|---|
| 867 | /* Step 2: |
|---|
| 868 | Build the adjustor. |
|---|
| 869 | */ |
|---|
| 870 | // allocate space for at most 4 insns per parameter |
|---|
| 871 | // plus 14 more instructions. |
|---|
| 872 | adjustor = allocateExec(4 * (4*n + 14),&code); |
|---|
| 873 | code = (unsigned*)adjustor; |
|---|
| 874 | |
|---|
| 875 | *code++ = 0x48000008; // b *+8 |
|---|
| 876 | // * Put the hptr in a place where freeHaskellFunctionPtr |
|---|
| 877 | // can get at it. |
|---|
| 878 | *code++ = (unsigned) hptr; |
|---|
| 879 | |
|---|
| 880 | // * save the link register |
|---|
| 881 | *code++ = 0x7c0802a6; // mflr r0; |
|---|
| 882 | *code++ = 0x90010004; // stw r0, 4(r1); |
|---|
| 883 | // * and build a new stack frame |
|---|
| 884 | *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1) |
|---|
| 885 | |
|---|
| 886 | // * now generate instructions to copy arguments |
|---|
| 887 | // from the old stack frame into the new stack frame. |
|---|
| 888 | for(i=n-1;i>=0;i--) |
|---|
| 889 | { |
|---|
| 890 | if(src_locs[i] < -32) |
|---|
| 891 | ASSERT(dst_locs[i] == src_locs[i]); |
|---|
| 892 | else if(src_locs[i] < 0) |
|---|
| 893 | { |
|---|
| 894 | // source in GPR. |
|---|
| 895 | ASSERT(typeString[i] != 'f' && typeString[i] != 'd'); |
|---|
| 896 | if(dst_locs[i] < 0) |
|---|
| 897 | { |
|---|
| 898 | ASSERT(dst_locs[i] > -32); |
|---|
| 899 | // dst is in GPR, too. |
|---|
| 900 | |
|---|
| 901 | if(typeString[i] == 'l' || typeString[i] == 'L') |
|---|
| 902 | { |
|---|
| 903 | // mr dst+1, src+1 |
|---|
| 904 | *code++ = 0x7c000378 |
|---|
| 905 | | ((-dst_locs[i]+1) << 16) |
|---|
| 906 | | ((-src_locs[i]+1) << 11) |
|---|
| 907 | | ((-src_locs[i]+1) << 21); |
|---|
| 908 | } |
|---|
| 909 | // mr dst, src |
|---|
| 910 | *code++ = 0x7c000378 |
|---|
| 911 | | ((-dst_locs[i]) << 16) |
|---|
| 912 | | ((-src_locs[i]) << 11) |
|---|
| 913 | | ((-src_locs[i]) << 21); |
|---|
| 914 | } |
|---|
| 915 | else |
|---|
| 916 | { |
|---|
| 917 | if(typeString[i] == 'l' || typeString[i] == 'L') |
|---|
| 918 | { |
|---|
| 919 | // stw src+1, dst_offset+4(r1) |
|---|
| 920 | *code++ = 0x90010000 |
|---|
| 921 | | ((-src_locs[i]+1) << 21) |
|---|
| 922 | | (dst_locs[i] + 4); |
|---|
| 923 | } |
|---|
| 924 | |
|---|
| 925 | // stw src, dst_offset(r1) |
|---|
| 926 | *code++ = 0x90010000 |
|---|
| 927 | | ((-src_locs[i]) << 21) |
|---|
| 928 | | (dst_locs[i] + 8); |
|---|
| 929 | } |
|---|
| 930 | } |
|---|
| 931 | else |
|---|
| 932 | { |
|---|
| 933 | ASSERT(dst_locs[i] >= 0); |
|---|
| 934 | ASSERT(typeString[i] != 'f' && typeString[i] != 'd'); |
|---|
| 935 | |
|---|
| 936 | if(typeString[i] == 'l' || typeString[i] == 'L') |
|---|
| 937 | { |
|---|
| 938 | // lwz r0, src_offset(r1) |
|---|
| 939 | *code++ = 0x80010000 |
|---|
| 940 | | (src_locs[i] + frameSize + 8 + 4); |
|---|
| 941 | // stw r0, dst_offset(r1) |
|---|
| 942 | *code++ = 0x90010000 |
|---|
| 943 | | (dst_locs[i] + 8 + 4); |
|---|
| 944 | } |
|---|
| 945 | // lwz r0, src_offset(r1) |
|---|
| 946 | *code++ = 0x80010000 |
|---|
| 947 | | (src_locs[i] + frameSize + 8); |
|---|
| 948 | // stw r0, dst_offset(r1) |
|---|
| 949 | *code++ = 0x90010000 |
|---|
| 950 | | (dst_locs[i] + 8); |
|---|
| 951 | } |
|---|
| 952 | } |
|---|
| 953 | |
|---|
| 954 | // * hptr will be the new first argument. |
|---|
| 955 | // lis r3, hi(hptr) |
|---|
| 956 | *code++ = OP_HI(0x3c60, hptr); |
|---|
| 957 | // ori r3,r3,lo(hptr) |
|---|
| 958 | *code++ = OP_LO(0x6063, hptr); |
|---|
| 959 | |
|---|
| 960 | // * we need to return to a piece of code |
|---|
| 961 | // which will tear down the stack frame. |
|---|
| 962 | // lis r11,hi(obscure_ccall_ret_code) |
|---|
| 963 | *code++ = OP_HI(0x3d60, obscure_ccall_ret_code); |
|---|
| 964 | // ori r11,r11,lo(obscure_ccall_ret_code) |
|---|
| 965 | *code++ = OP_LO(0x616b, obscure_ccall_ret_code); |
|---|
| 966 | // mtlr r11 |
|---|
| 967 | *code++ = 0x7d6803a6; |
|---|
| 968 | |
|---|
| 969 | // * jump to wptr |
|---|
| 970 | // lis r11,hi(wptr) |
|---|
| 971 | *code++ = OP_HI(0x3d60, wptr); |
|---|
| 972 | // ori r11,r11,lo(wptr) |
|---|
| 973 | *code++ = OP_LO(0x616b, wptr); |
|---|
| 974 | // mtctr r11 |
|---|
| 975 | *code++ = 0x7d6903a6; |
|---|
| 976 | // bctr |
|---|
| 977 | *code++ = 0x4e800420; |
|---|
| 978 | |
|---|
| 979 | // Flush the Instruction cache: |
|---|
| 980 | { |
|---|
| 981 | unsigned *p = adjustor; |
|---|
| 982 | while(p < code) |
|---|
| 983 | { |
|---|
| 984 | __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0" |
|---|
| 985 | : : "r" (p)); |
|---|
| 986 | p++; |
|---|
| 987 | } |
|---|
| 988 | __asm__ volatile ("sync\n\tisync"); |
|---|
| 989 | } |
|---|
| 990 | } |
|---|
| 991 | |
|---|
| 992 | #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) |
|---|
| 993 | |
|---|
| 994 | #define OP_LO(op,lo) ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF)) |
|---|
| 995 | #define OP_HI(op,hi) ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16)) |
|---|
| 996 | { |
|---|
| 997 | /* The following code applies to all PowerPC and PowerPC64 platforms |
|---|
| 998 | whose stack layout is based on the AIX ABI. |
|---|
| 999 | |
|---|
| 1000 | Besides (obviously) AIX, this includes |
|---|
| 1001 | Mac OS 9 and BeOS/PPC (may they rest in peace), |
|---|
| 1002 | which use the 32-bit AIX ABI |
|---|
| 1003 | powerpc64-linux, |
|---|
| 1004 | which uses the 64-bit AIX ABI |
|---|
| 1005 | and Darwin (Mac OS X), |
|---|
| 1006 | which uses the same stack layout as AIX, |
|---|
| 1007 | but no function descriptors. |
|---|
| 1008 | |
|---|
| 1009 | The actual stack-frame shuffling is implemented out-of-line |
|---|
| 1010 | in the function adjustorCode, in AdjustorAsm.S. |
|---|
| 1011 | Here, we set up an AdjustorStub structure, which |
|---|
| 1012 | is a function descriptor (on platforms that have function |
|---|
| 1013 | descriptors) or a short piece of stub code (on Darwin) to call |
|---|
| 1014 | adjustorCode with a pointer to the AdjustorStub struct loaded |
|---|
| 1015 | into register r2. |
|---|
| 1016 | |
|---|
| 1017 | One nice thing about this is that there is _no_ code generated at |
|---|
| 1018 | runtime on the platforms that have function descriptors. |
|---|
| 1019 | */ |
|---|
| 1020 | AdjustorStub *adjustorStub; |
|---|
| 1021 | int sz = 0, extra_sz, total_sz; |
|---|
| 1022 | |
|---|
| 1023 | #ifdef FUNDESCS |
|---|
| 1024 | adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor"); |
|---|
| 1025 | #else |
|---|
| 1026 | adjustorStub = allocateExec(sizeof(AdjustorStub),&code); |
|---|
| 1027 | #endif |
|---|
| 1028 | adjustor = adjustorStub; |
|---|
| 1029 | |
|---|
| 1030 | adjustorStub->code = (void*) &adjustorCode; |
|---|
| 1031 | |
|---|
| 1032 | #ifdef FUNDESCS |
|---|
| 1033 | // function descriptors are a cool idea. |
|---|
| 1034 | // We don't need to generate any code at runtime. |
|---|
| 1035 | adjustorStub->toc = adjustorStub; |
|---|
| 1036 | #else |
|---|
| 1037 | |
|---|
| 1038 | // no function descriptors :-( |
|---|
| 1039 | // We need to do things "by hand". |
|---|
| 1040 | #if defined(powerpc_HOST_ARCH) |
|---|
| 1041 | // lis r2, hi(adjustorStub) |
|---|
| 1042 | adjustorStub->lis = OP_HI(0x3c40, adjustorStub); |
|---|
| 1043 | // ori r2, r2, lo(adjustorStub) |
|---|
| 1044 | adjustorStub->ori = OP_LO(0x6042, adjustorStub); |
|---|
| 1045 | // lwz r0, code(r2) |
|---|
| 1046 | adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code) |
|---|
| 1047 | - (char*)adjustorStub); |
|---|
| 1048 | // mtctr r0 |
|---|
| 1049 | adjustorStub->mtctr = 0x7c0903a6; |
|---|
| 1050 | // bctr |
|---|
| 1051 | adjustorStub->bctr = 0x4e800420; |
|---|
| 1052 | #else |
|---|
| 1053 | barf("adjustor creation not supported on this platform"); |
|---|
| 1054 | #endif |
|---|
| 1055 | |
|---|
| 1056 | // Flush the Instruction cache: |
|---|
| 1057 | { |
|---|
| 1058 | int n = sizeof(AdjustorStub)/sizeof(unsigned); |
|---|
| 1059 | unsigned *p = (unsigned*)adjustor; |
|---|
| 1060 | while(n--) |
|---|
| 1061 | { |
|---|
| 1062 | __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0" |
|---|
| 1063 | : : "r" (p)); |
|---|
| 1064 | p++; |
|---|
| 1065 | } |
|---|
| 1066 | __asm__ volatile ("sync\n\tisync"); |
|---|
| 1067 | } |
|---|
| 1068 | #endif |
|---|
| 1069 | |
|---|
| 1070 | // Calculate the size of the stack frame, in words. |
|---|
| 1071 | sz = totalArgumentSize(typeString); |
|---|
| 1072 | |
|---|
| 1073 | // The first eight words of the parameter area |
|---|
| 1074 | // are just "backing store" for the parameters passed in |
|---|
| 1075 | // the GPRs. extra_sz is the number of words beyond those first |
|---|
| 1076 | // 8 words. |
|---|
| 1077 | extra_sz = sz - 8; |
|---|
| 1078 | if(extra_sz < 0) |
|---|
| 1079 | extra_sz = 0; |
|---|
| 1080 | |
|---|
| 1081 | // Calculate the total size of the stack frame. |
|---|
| 1082 | total_sz = (6 /* linkage area */ |
|---|
| 1083 | + 8 /* minimum parameter area */ |
|---|
| 1084 | + 2 /* two extra arguments */ |
|---|
| 1085 | + extra_sz)*sizeof(StgWord); |
|---|
| 1086 | |
|---|
| 1087 | // align to 16 bytes. |
|---|
| 1088 | // AIX only requires 8 bytes, but who cares? |
|---|
| 1089 | total_sz = (total_sz+15) & ~0xF; |
|---|
| 1090 | |
|---|
| 1091 | // Fill in the information that adjustorCode in AdjustorAsm.S |
|---|
| 1092 | // will use to create a new stack frame with the additional args. |
|---|
| 1093 | adjustorStub->hptr = hptr; |
|---|
| 1094 | adjustorStub->wptr = wptr; |
|---|
| 1095 | adjustorStub->negative_framesize = -total_sz; |
|---|
| 1096 | adjustorStub->extrawords_plus_one = extra_sz + 1; |
|---|
| 1097 | } |
|---|
| 1098 | |
|---|
| 1099 | #elif defined(ia64_HOST_ARCH) |
|---|
| 1100 | /* |
|---|
| 1101 | Up to 8 inputs are passed in registers. We flush the last two inputs to |
|---|
| 1102 | the stack, initially into the 16-byte scratch region left by the caller. |
|---|
| 1103 | We then shuffle the others along by 4 (taking 2 registers for ourselves |
|---|
| 1104 | to save return address and previous function state - we need to come back |
|---|
| 1105 | here on the way out to restore the stack, so this is a real function |
|---|
| 1106 | rather than just a trampoline). |
|---|
| 1107 | |
|---|
| 1108 | The function descriptor we create contains the gp of the target function |
|---|
| 1109 | so gp is already loaded correctly. |
|---|
| 1110 | |
|---|
| 1111 | [MLX] alloc r16=ar.pfs,10,2,0 |
|---|
| 1112 | movl r17=wptr |
|---|
| 1113 | [MII] st8.spill [r12]=r38,8 // spill in6 (out4) |
|---|
| 1114 | mov r41=r37 // out7 = in5 (out3) |
|---|
| 1115 | mov r40=r36;; // out6 = in4 (out2) |
|---|
| 1116 | [MII] st8.spill [r12]=r39 // spill in7 (out5) |
|---|
| 1117 | mov.sptk b6=r17,50 |
|---|
| 1118 | mov r38=r34;; // out4 = in2 (out0) |
|---|
| 1119 | [MII] mov r39=r35 // out5 = in3 (out1) |
|---|
| 1120 | mov r37=r33 // out3 = in1 (loc1) |
|---|
| 1121 | mov r36=r32 // out2 = in0 (loc0) |
|---|
| 1122 | [MLX] adds r12=-24,r12 // update sp |
|---|
| 1123 | movl r34=hptr;; // out0 = hptr |
|---|
| 1124 | [MIB] mov r33=r16 // loc1 = ar.pfs |
|---|
| 1125 | mov r32=b0 // loc0 = retaddr |
|---|
| 1126 | br.call.sptk.many b0=b6;; |
|---|
| 1127 | |
|---|
| 1128 | [MII] adds r12=-16,r12 |
|---|
| 1129 | mov b0=r32 |
|---|
| 1130 | mov.i ar.pfs=r33 |
|---|
| 1131 | [MFB] nop.m 0x0 |
|---|
| 1132 | nop.f 0x0 |
|---|
| 1133 | br.ret.sptk.many b0;; |
|---|
| 1134 | */ |
|---|
| 1135 | |
|---|
| 1136 | /* These macros distribute a long constant into the two words of an MLX bundle */ |
|---|
| 1137 | #define BITS(val,start,count) (((val) >> (start)) & ((1 << (count))-1)) |
|---|
| 1138 | #define MOVL_LOWORD(val) (BITS(val,22,18) << 46) |
|---|
| 1139 | #define MOVL_HIWORD(val) ( (BITS(val,0,7) << 36) \ |
|---|
| 1140 | | (BITS(val,7,9) << 50) \ |
|---|
| 1141 | | (BITS(val,16,5) << 45) \ |
|---|
| 1142 | | (BITS(val,21,1) << 44) \ |
|---|
| 1143 | | (BITS(val,40,23)) \ |
|---|
| 1144 | | (BITS(val,63,1) << 59)) |
|---|
| 1145 | |
|---|
| 1146 | { |
|---|
| 1147 | StgStablePtr stable; |
|---|
| 1148 | IA64FunDesc *wdesc = (IA64FunDesc *)wptr; |
|---|
| 1149 | StgWord64 wcode = wdesc->ip; |
|---|
| 1150 | IA64FunDesc *fdesc; |
|---|
| 1151 | StgWord64 *code; |
|---|
| 1152 | |
|---|
| 1153 | /* we allocate on the Haskell heap since malloc'd memory isn't |
|---|
| 1154 | * executable - argh */ |
|---|
| 1155 | /* Allocated memory is word-aligned (8 bytes) but functions on ia64 |
|---|
| 1156 | * must be aligned to 16 bytes. We allocate an extra 8 bytes of |
|---|
| 1157 | * wiggle room so that we can put the code on a 16 byte boundary. */ |
|---|
| 1158 | adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8+8, &stable); |
|---|
| 1159 | |
|---|
| 1160 | fdesc = (IA64FunDesc *)adjustor; |
|---|
| 1161 | code = (StgWord64 *)(fdesc + 1); |
|---|
| 1162 | /* add 8 bytes to code if needed to align to a 16-byte boundary */ |
|---|
| 1163 | if ((StgWord64)code & 15) code++; |
|---|
| 1164 | fdesc->ip = (StgWord64)code; |
|---|
| 1165 | fdesc->gp = wdesc->gp; |
|---|
| 1166 | |
|---|
| 1167 | code[0] = 0x0000058004288004 | MOVL_LOWORD(wcode); |
|---|
| 1168 | code[1] = 0x6000000220000000 | MOVL_HIWORD(wcode); |
|---|
| 1169 | code[2] = 0x029015d818984001; |
|---|
| 1170 | code[3] = 0x8401200500420094; |
|---|
| 1171 | code[4] = 0x886011d8189c0001; |
|---|
| 1172 | code[5] = 0x84011004c00380c0; |
|---|
| 1173 | code[6] = 0x0250210046013800; |
|---|
| 1174 | code[7] = 0x8401000480420084; |
|---|
| 1175 | code[8] = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr); |
|---|
| 1176 | code[9] = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr); |
|---|
| 1177 | code[10] = 0x0200210020010811; |
|---|
| 1178 | code[11] = 0x1080006800006200; |
|---|
| 1179 | code[12] = 0x0000210018406000; |
|---|
| 1180 | code[13] = 0x00aa021000038005; |
|---|
| 1181 | code[14] = 0x000000010000001d; |
|---|
| 1182 | code[15] = 0x0084000880000200; |
|---|
| 1183 | |
|---|
| 1184 | /* save stable pointers in convenient form */ |
|---|
| 1185 | code[16] = (StgWord64)hptr; |
|---|
| 1186 | code[17] = (StgWord64)stable; |
|---|
| 1187 | } |
|---|
| 1188 | #else |
|---|
| 1189 | barf("adjustor creation not supported on this platform"); |
|---|
| 1190 | #endif |
|---|
| 1191 | break; |
|---|
| 1192 | |
|---|
| 1193 | default: |
|---|
| 1194 | ASSERT(0); |
|---|
| 1195 | break; |
|---|
| 1196 | } |
|---|
| 1197 | |
|---|
| 1198 | /* Have fun! */ |
|---|
| 1199 | return code; |
|---|
| 1200 | } |
|---|
| 1201 | |
|---|
| 1202 | |
|---|
| 1203 | void |
|---|
| 1204 | freeHaskellFunctionPtr(void* ptr) |
|---|
| 1205 | { |
|---|
| 1206 | #if defined(i386_HOST_ARCH) |
|---|
| 1207 | if ( *(unsigned char*)ptr != 0xe8 && |
|---|
| 1208 | *(unsigned char*)ptr != 0x58 ) { |
|---|
| 1209 | errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); |
|---|
| 1210 | return; |
|---|
| 1211 | } |
|---|
| 1212 | if (*(unsigned char*)ptr == 0xe8) { /* Aha, a ccall adjustor! */ |
|---|
| 1213 | freeStablePtr(((AdjustorStub*)ptr)->hptr); |
|---|
| 1214 | } else { |
|---|
| 1215 | freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02))); |
|---|
| 1216 | } |
|---|
| 1217 | #elif defined(x86_64_HOST_ARCH) |
|---|
| 1218 | if ( *(StgWord16 *)ptr == 0x894d ) { |
|---|
| 1219 | freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+ |
|---|
| 1220 | #if defined(mingw32_HOST_OS) |
|---|
| 1221 | 0x28 |
|---|
| 1222 | #else |
|---|
| 1223 | 0x20 |
|---|
| 1224 | #endif |
|---|
| 1225 | )); |
|---|
| 1226 | #if !defined(mingw32_HOST_OS) |
|---|
| 1227 | } else if ( *(StgWord16 *)ptr == 0x5141 ) { |
|---|
| 1228 | freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30)); |
|---|
| 1229 | #endif |
|---|
| 1230 | #if defined(mingw32_HOST_OS) |
|---|
| 1231 | } else if ( *(StgWord16 *)ptr == 0x8348 ) { |
|---|
| 1232 | freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48)); |
|---|
| 1233 | #endif |
|---|
| 1234 | } else { |
|---|
| 1235 | errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); |
|---|
| 1236 | return; |
|---|
| 1237 | } |
|---|
| 1238 | #elif defined(sparc_HOST_ARCH) |
|---|
| 1239 | if ( *(unsigned long*)ptr != 0x9C23A008UL ) { |
|---|
| 1240 | errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); |
|---|
| 1241 | return; |
|---|
| 1242 | } |
|---|
| 1243 | |
|---|
| 1244 | /* Free the stable pointer first..*/ |
|---|
| 1245 | freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11))); |
|---|
| 1246 | #elif defined(alpha_HOST_ARCH) |
|---|
| 1247 | if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) { |
|---|
| 1248 | errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); |
|---|
| 1249 | return; |
|---|
| 1250 | } |
|---|
| 1251 | |
|---|
| 1252 | /* Free the stable pointer first..*/ |
|---|
| 1253 | freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10))); |
|---|
| 1254 | #elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS) |
|---|
| 1255 | if ( *(StgWord*)ptr != 0x48000008 ) { |
|---|
| 1256 | errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); |
|---|
| 1257 | return; |
|---|
| 1258 | } |
|---|
| 1259 | freeStablePtr(((StgStablePtr*)ptr)[1]); |
|---|
| 1260 | #elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH) |
|---|
| 1261 | if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) { |
|---|
| 1262 | errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); |
|---|
| 1263 | return; |
|---|
| 1264 | } |
|---|
| 1265 | freeStablePtr(((AdjustorStub*)ptr)->hptr); |
|---|
| 1266 | #elif defined(ia64_HOST_ARCH) |
|---|
| 1267 | IA64FunDesc *fdesc = (IA64FunDesc *)ptr; |
|---|
| 1268 | StgWord64 *code = (StgWord64 *)(fdesc+1); |
|---|
| 1269 | |
|---|
| 1270 | if (fdesc->ip != (StgWord64)code) { |
|---|
| 1271 | errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr); |
|---|
| 1272 | return; |
|---|
| 1273 | } |
|---|
| 1274 | freeStablePtr((StgStablePtr)code[16]); |
|---|
| 1275 | freeStablePtr((StgStablePtr)code[17]); |
|---|
| 1276 | return; |
|---|
| 1277 | #else |
|---|
| 1278 | ASSERT(0); |
|---|
| 1279 | #endif |
|---|
| 1280 | // Can't write to this memory, it is only executable: |
|---|
| 1281 | // *((unsigned char*)ptr) = '\0'; |
|---|
| 1282 | |
|---|
| 1283 | freeExec(ptr); |
|---|
| 1284 | } |
|---|
| 1285 | |
|---|
| 1286 | #endif // !USE_LIBFFI_FOR_ADJUSTORS |
|---|