root/rts/Adjustor.c

Revision c2870706b29c24ac86ae2a9e2359dd1e4af71ac8, 45.8 KB (checked in by Ian Lynagh <igloo@…>, 10 days ago)

More Win64 adjustor fixes

  • Property mode set to 100644
Line 
1/* -----------------------------------------------------------------------------
2 * Foreign export adjustor thunks
3 *
4 * Copyright (c) 1998.
5 *
6 * ---------------------------------------------------------------------------*/
7
8/* A little bit of background...
9   
10An adjustor thunk is a dynamically allocated code snippet that allows
11Haskell closures to be viewed as C function pointers.
12
13Stable pointers provide a way for the outside world to get access to,
14and evaluate, Haskell heap objects, with the RTS providing a small
15range of ops for doing so. So, assuming we've got a stable pointer in
16our hand in C, we can jump into the Haskell world and evaluate a callback
17procedure, say. This works OK in some cases where callbacks are used, but
18does require the external code to know about stable pointers and how to deal
19with them. We'd like to hide the Haskell-nature of a callback and have it
20be invoked just like any other C function pointer.
21
22Enter adjustor thunks. An adjustor thunk is a little piece of code
23that's generated on-the-fly (one per Haskell closure being exported)
24that, when entered using some 'universal' calling convention (e.g., the
25C calling convention on platform X), pushes an implicit stable pointer
26(to the Haskell callback) before calling another (static) C function stub
27which takes care of entering the Haskell code via its stable pointer.
28
29An adjustor thunk is allocated on the C heap, and is called from within
30Haskell just before handing out the function pointer to the Haskell (IO)
31action. User code should never have to invoke it explicitly.
32
33An adjustor thunk differs from a C function pointer in one respect: when
34the code is through with it, it has to be freed in order to release Haskell
35and C resources. Failure to do so will result in memory leaks on both the C and
36Haskell side.
37*/
38
39#include "PosixSource.h"
40#include "Rts.h"
41
42#include "RtsUtils.h"
43#include "Stable.h"
44
45#if defined(USE_LIBFFI_FOR_ADJUSTORS)
46#include "ffi.h"
47#include <string.h>
48#endif
49
50#if defined(i386_HOST_ARCH)
51extern void adjustorCode(void);
52#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
53// from AdjustorAsm.s
54// not declared as a function so that AIX-style
55// fundescs can never get in the way.
56extern void *adjustorCode;
57#endif
58
59#if defined(USE_LIBFFI_FOR_ADJUSTORS)
60void
61freeHaskellFunctionPtr(void* ptr)
62{
63    ffi_closure *cl;
64
65    cl = (ffi_closure*)ptr;
66    freeStablePtr(cl->user_data);
67    stgFree(cl->cif->arg_types);
68    stgFree(cl->cif);
69    freeExec(cl);
70}
71
72static ffi_type * char_to_ffi_type(char c)
73{
74    switch (c) {
75    case 'v'return &ffi_type_void;
76    case 'f'return &ffi_type_float;
77    case 'd'return &ffi_type_double;
78    case 'L'return &ffi_type_sint64;
79    case 'l'return &ffi_type_uint64;
80    case 'W'return &ffi_type_sint32;
81    case 'w'return &ffi_type_uint32;
82    case 'S'return &ffi_type_sint16;
83    case 's'return &ffi_type_uint16;
84    case 'B'return &ffi_type_sint8;
85    case 'b'return &ffi_type_uint8;
86    case 'p'return &ffi_type_pointer;
87    default:   barf("char_to_ffi_type: unknown type '%c'", c);
88    }
89}
90
91void*
92createAdjustor (int cconv, 
93                StgStablePtr hptr,
94                StgFunPtr wptr,
95                char *typeString)
96{
97    ffi_cif *cif;
98    ffi_type **arg_types;
99    nat n_args, i;
100    ffi_type *result_type;
101    ffi_closure *cl;
102    int r, abi;
103    void *code;
104
105    n_args = strlen(typeString) - 1;
106    cif = stgMallocBytes(sizeof(ffi_cif), "createAdjustor");
107    arg_types = stgMallocBytes(n_args * sizeof(ffi_type*), "createAdjustor");
108
109    result_type = char_to_ffi_type(typeString[0]);
110    for (i=0; i < n_args; i++) {
111        arg_types[i] = char_to_ffi_type(typeString[i+1]);
112    }
113    switch (cconv) {
114#if defined(mingw32_HOST_OS) && defined(i386_HOST_ARCH)
115    case 0: /* stdcall */
116        abi = FFI_STDCALL;
117        break;
118#endif
119    case 1: /* ccall */
120        abi = FFI_DEFAULT_ABI;
121        break;
122    default:
123        barf("createAdjustor: convention %d not supported on this platform", cconv);
124    }
125
126    r = ffi_prep_cif(cif, abi, n_args, result_type, arg_types);
127    if (r != FFI_OK) barf("ffi_prep_cif failed: %d", r);
128   
129    cl = allocateExec(sizeof(ffi_closure), &code);
130    if (cl == NULL) {
131        barf("createAdjustor: failed to allocate memory");
132    }
133
134    r = ffi_prep_closure(cl, cif, (void*)wptr, hptr/*userdata*/);
135    if (r != FFI_OK) barf("ffi_prep_closure failed: %d", r);
136
137    return (void*)code;
138}
139
140#else // To end of file...
141
142#if defined(_WIN32)
143#include <windows.h>
144#endif
145
146#if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
147#include <string.h>
148#endif
149
150#ifdef LEADING_UNDERSCORE
151#define UNDERSCORE "_"
152#else
153#define UNDERSCORE ""
154#endif
155
156#if defined(x86_64_HOST_ARCH)
157/*
158  Now here's something obscure for you:
159
160  When generating an adjustor thunk that uses the C calling
161  convention, we have to make sure that the thunk kicks off
162  the process of jumping into Haskell with a tail jump. Why?
163  Because as a result of jumping in into Haskell we may end
164  up freeing the very adjustor thunk we came from using
165  freeHaskellFunctionPtr(). Hence, we better not return to
166  the adjustor code on our way  out, since it could by then
167  point to junk.
168 
169  The fix is readily at hand, just include the opcodes
170  for the C stack fixup code that we need to perform when
171  returning in some static piece of memory and arrange
172  to return to it before tail jumping from the adjustor thunk.
173*/
174static void GNUC3_ATTRIBUTE(used) obscure_ccall_wrapper(void)
175{
176  __asm__ (
177   ".globl " UNDERSCORE "obscure_ccall_ret_code\n"
178   UNDERSCORE "obscure_ccall_ret_code:\n\t"
179   "addq $0x8, %rsp\n\t"
180#if defined(mingw32_HOST_OS)
181   /* On Win64, we had to put the original return address after the
182      arg 1-4 spill slots, ro now we have to move it back */
183   "movq 0x20(%rsp), %rcx\n"
184   "movq %rcx, (%rsp)\n"
185#endif
186   "ret"
187  );
188}
189extern void obscure_ccall_ret_code(void);
190#endif
191
192#if defined(alpha_HOST_ARCH)
193/* To get the definition of PAL_imb: */
194# if defined(linux_HOST_OS)
195#  include <asm/pal.h>
196# else
197#  include <machine/pal.h>
198# endif
199#endif
200
201#if defined(ia64_HOST_ARCH)
202
203/* Layout of a function descriptor */
204typedef struct _IA64FunDesc {
205    StgWord64 ip;
206    StgWord64 gp;
207} IA64FunDesc;
208
209static void *
210stgAllocStable(size_t size_in_bytes, StgStablePtr *stable)
211{
212  StgArrWords* arr;
213  nat data_size_in_words, total_size_in_words;
214 
215  /* round up to a whole number of words */
216  data_size_in_words  = ROUNDUP_BYTES_TO_WDS(size_in_bytes);
217  total_size_in_words = sizeofW(StgArrWords) + data_size_in_words;
218 
219  /* allocate and fill it in */
220  arr = (StgArrWords *)allocate(total_size_in_words);
221  SET_ARR_HDR(arr, &stg_ARR_WORDS_info, CCCS, size_in_bytes);
222 
223  /* obtain a stable ptr */
224  *stable = getStablePtr((StgPtr)arr);
225
226  /* and return a ptr to the goods inside the array */
227  return(&(arr->payload));
228}
229#endif
230
231#if defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
232__asm__("obscure_ccall_ret_code:\n\t"
233        "lwz 1,0(1)\n\t"
234        "lwz 0,4(1)\n\t"
235        "mtlr 0\n\t"
236        "blr");
237extern void obscure_ccall_ret_code(void);
238#endif
239
240#if defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
241#if !(defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS))
242
243/* !!! !!! WARNING: !!! !!!
244 * This structure is accessed from AdjustorAsm.s
245 * Any changes here have to be mirrored in the offsets there.
246 */
247
248typedef struct AdjustorStub {
249#if defined(powerpc_HOST_ARCH) && defined(darwin_HOST_OS)
250    unsigned        lis;
251    unsigned        ori;
252    unsigned        lwz;
253    unsigned        mtctr;
254    unsigned        bctr;
255    StgFunPtr       code;
256#elif defined(powerpc64_HOST_ARCH) && defined(darwin_HOST_OS)
257        /* powerpc64-darwin: just guessing that it won't use fundescs. */
258    unsigned        lis;
259    unsigned        ori;
260    unsigned        rldimi;
261    unsigned        oris;
262    unsigned        ori2;
263    unsigned        lwz;
264    unsigned        mtctr;
265    unsigned        bctr;
266    StgFunPtr       code;
267#else
268        /* fundesc-based ABIs */
269#define         FUNDESCS
270    StgFunPtr       code;
271    struct AdjustorStub
272                    *toc;
273    void            *env;
274#endif
275    StgStablePtr    hptr;
276    StgFunPtr       wptr;
277    StgInt          negative_framesize;
278    StgInt          extrawords_plus_one;
279} AdjustorStub;
280
281#endif
282#endif
283
284#if defined(i386_HOST_ARCH)
285
286/* !!! !!! WARNING: !!! !!!
287 * This structure is accessed from AdjustorAsm.s
288 * Any changes here have to be mirrored in the offsets there.
289 */
290
291typedef struct AdjustorStub {
292    unsigned char   call[8];
293    StgStablePtr    hptr;
294    StgFunPtr       wptr;
295    StgInt          frame_size;
296    StgInt          argument_size;
297} AdjustorStub;
298#endif
299
300#if defined(i386_HOST_ARCH) || defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
301static int totalArgumentSize(char *typeString)
302{
303    int sz = 0;
304    while(*typeString)
305    {
306        char t = *typeString++;
307
308        switch(t)
309        {
310                // on 32-bit platforms, Double and Int64 occupy two words.
311            case 'd':
312            case 'l':
313            case 'L':
314                if(sizeof(void*) == 4)
315                {
316                    sz += 2;
317                    break;
318                }
319                // everything else is one word.
320            default:
321                sz += 1;
322        }
323    }
324    return sz;
325}
326#endif
327
328void*
329createAdjustor(int cconv, StgStablePtr hptr,
330               StgFunPtr wptr,
331               char *typeString
332#if !defined(powerpc_HOST_ARCH) && !defined(powerpc64_HOST_ARCH) && !defined(x86_64_HOST_ARCH)
333                  STG_UNUSED
334#endif
335              )
336{
337  void *adjustor = NULL;
338  void *code;
339
340  switch (cconv)
341  {
342  case 0: /* _stdcall */
343#if defined(i386_HOST_ARCH) && !defined(darwin_HOST_OS)
344    /* Magic constant computed by inspecting the code length of
345       the following assembly language snippet
346       (offset and machine code prefixed):
347
348     <0>:       58                popl   %eax              # temp. remove ret addr..
349     <1>:       68 fd fc fe fa    pushl  0xfafefcfd        # constant is large enough to
350                                                           # hold a StgStablePtr
351     <6>:       50                pushl  %eax              # put back ret. addr
352     <7>:       b8 fa ef ff 00    movl   $0x00ffeffa, %eax # load up wptr
353     <c>:       ff e0             jmp    %eax              # and jump to it.
354                # the callee cleans up the stack
355    */
356    adjustor = allocateExec(14,&code);
357    {
358        unsigned char *const adj_code = (unsigned char *)adjustor;
359        adj_code[0x00] = (unsigned char)0x58;  /* popl %eax  */
360
361        adj_code[0x01] = (unsigned char)0x68;  /* pushl hptr (which is a dword immediate ) */
362        *((StgStablePtr*)(adj_code + 0x02)) = (StgStablePtr)hptr;
363
364        adj_code[0x06] = (unsigned char)0x50; /* pushl %eax */
365
366        adj_code[0x07] = (unsigned char)0xb8; /* movl  $wptr, %eax */
367        *((StgFunPtr*)(adj_code + 0x08)) = (StgFunPtr)wptr;
368
369        adj_code[0x0c] = (unsigned char)0xff; /* jmp %eax */
370        adj_code[0x0d] = (unsigned char)0xe0;
371    }
372#endif
373    break;
374
375  case 1: /* _ccall */
376#if defined(i386_HOST_ARCH)
377    {
378        /*
379          Most of the trickiness here is due to the need to keep the
380          stack pointer 16-byte aligned (see #5250).  That means we
381          can't just push another argument on the stack and call the
382          wrapper, we may have to shuffle the whole argument block.
383
384          We offload most of the work to AdjustorAsm.S.
385        */
386        AdjustorStub *adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
387        adjustor = adjustorStub;
388
389        int sz = totalArgumentSize(typeString);
390       
391        adjustorStub->call[0] = 0xe8;
392        *(long*)&adjustorStub->call[1] = ((char*)&adjustorCode) - ((char*)adjustorStub + 5);
393        adjustorStub->hptr = hptr;
394        adjustorStub->wptr = wptr;
395       
396            // The adjustor puts the following things on the stack:
397            // 1.) %ebp link
398            // 2.) padding and (a copy of) the arguments
399            // 3.) a dummy argument
400            // 4.) hptr
401            // 5.) return address (for returning to the adjustor)
402            // All these have to add up to a multiple of 16.
403
404            // first, include everything in frame_size
405        adjustorStub->frame_size = sz * 4 + 16;
406            // align to 16 bytes
407        adjustorStub->frame_size = (adjustorStub->frame_size + 15) & ~15;
408            // only count 2.) and 3.) as part of frame_size
409        adjustorStub->frame_size -= 12; 
410        adjustorStub->argument_size = sz;
411    }
412   
413#elif defined(x86_64_HOST_ARCH)
414
415# if defined(mingw32_HOST_OS)
416    /*
417      stack at call:
418               argn
419               ...
420               arg5
421               return address
422               %rcx,%rdx,%r8,%r9 = arg1..arg4
423
424      if there are <4 integer args, then we can just push the
425      StablePtr into %rcx and shuffle the other args up.
426
427      If there are >=4 integer args, then we have to flush one arg
428      to the stack, and arrange to adjust the stack ptr on return.
429      The stack will be rearranged to this:
430
431             argn
432             ...
433             arg5
434             return address  *** <-- dummy arg in stub fn.
435             arg4
436             obscure_ccall_ret_code
437
438      This unfortunately means that the type of the stub function
439      must have a dummy argument for the original return address
440      pointer inserted just after the 4th integer argument.
441
442      Code for the simple case:
443
444   0:   4d 89 c1                mov    %r8,%r9
445   3:   49 89 d0                mov    %rdx,%r8
446   6:   48 89 ca                mov    %rcx,%rdx
447   9:   f2 0f 10 da             movsd  %xmm2,%xmm3
448   d:   f2 0f 10 d1             movsd  %xmm1,%xmm2
449  11:   f2 0f 10 c8             movsd  %xmm0,%xmm1
450  15:   48 8b 0d 0c 00 00 00    mov    0xc(%rip),%rcx    # 28 <.text+0x28>
451  1c:   ff 25 0e 00 00 00       jmpq   *0xe(%rip)        # 30 <.text+0x30>
452  22:   90                      nop
453  [...]
454
455
456  And the version for >=4 integer arguments:
457
458[we want to push the 4th argument (either %r9 or %xmm3, depending on
459 whether it is a floating arg or not) and the return address onto the
460 stack. However, slots 1-4 are reserved for code we call to spill its
461 args 1-4 into, so we can't just push them onto the bottom of the stack.
462 So first put the 4th argument onto the stack, above what will be the
463 spill slots.]
464   0:   48 83 ec 08             sub    $0x8,%rsp
465[if non-floating arg, then do this:]
466   4:   90                      nop
467   5:   4c 89 4c 24 20          mov    %r9,0x20(%rsp)
468[else if floating arg then do this:]
469   4:   f2 0f 11 5c 24 20       movsd  %xmm3,0x20(%rsp)
470[end if]
471[Now push the new return address onto the stack]
472   a:   ff 35 30 00 00 00       pushq  0x30(%rip)        # 40 <.text+0x40>
473[But the old return address has been moved up into a spill slot, so
474 we need to move it above them]
475  10:   4c 8b 4c 24 10          mov    0x10(%rsp),%r9
476  15:   4c 89 4c 24 30          mov    %r9,0x30(%rsp)
477[Now we do the normal register shuffle-up etc]
478  1a:   4d 89 c1                mov    %r8,%r9
479  1d:   49 89 d0                mov    %rdx,%r8
480  20:   48 89 ca                mov    %rcx,%rdx
481  23:   f2 0f 10 da             movsd  %xmm2,%xmm3
482  27:   f2 0f 10 d1             movsd  %xmm1,%xmm2
483  2b:   f2 0f 10 c8             movsd  %xmm0,%xmm1
484  2f:   48 8b 0d 12 00 00 00    mov    0x12(%rip),%rcx        # 48 <.text+0x48>
485  36:   ff 25 14 00 00 00       jmpq   *0x14(%rip)        # 50 <.text+0x50>
486  3c:   90                      nop
487  3d:   90                      nop
488  3e:   90                      nop
489  3f:   90                      nop
490  [...]
491
492    */
493    { 
494        int i = 0;
495        int fourthFloating;
496        char *c;
497        StgWord8 *adj_code;
498
499        // determine whether we have 4 or more integer arguments,
500        // and therefore need to flush one to the stack.
501        for (c = typeString; *c != '\0'; c++) {
502            i++;
503            if (i == 4) {
504                fourthFloating = (*c == 'f' || *c == 'd');
505                break;
506            }
507        }
508
509        if (i < 4) {
510            adjustor = allocateExec(0x38,&code);
511            adj_code = (StgWord8*)adjustor;
512
513            *(StgInt32 *)adj_code        = 0x49c1894d;
514            *(StgInt32 *)(adj_code+0x4)  = 0x8948d089;
515            *(StgInt32 *)(adj_code+0x8)  = 0x100ff2ca;
516            *(StgInt32 *)(adj_code+0xc)  = 0x100ff2da;
517            *(StgInt32 *)(adj_code+0x10) = 0x100ff2d1;
518            *(StgInt32 *)(adj_code+0x14) = 0x0d8b48c8;
519            *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
520
521            *(StgInt32 *)(adj_code+0x1c) = 0x000e25ff;
522            *(StgInt32 *)(adj_code+0x20) = 0x00000000;
523            *(StgInt64 *)(adj_code+0x28) = (StgInt64)hptr;
524            *(StgInt64 *)(adj_code+0x30) = (StgInt64)wptr;
525        }
526        else
527        {
528            adjustor = allocateExec(0x58,&code);
529            adj_code = (StgWord8*)adjustor;
530            *(StgInt32 *)adj_code        = 0x08ec8348;
531            *(StgInt32 *)(adj_code+0x4)  = fourthFloating ? 0x5c110ff2
532                                                          : 0x4c894c90;
533            *(StgInt32 *)(adj_code+0x8)  = 0x35ff2024;
534            *(StgInt32 *)(adj_code+0xc)  = 0x00000030;
535            *(StgInt32 *)(adj_code+0x10) = 0x244c8b4c;
536            *(StgInt32 *)(adj_code+0x14) = 0x4c894c10;
537            *(StgInt32 *)(adj_code+0x18) = 0x894d3024;
538            *(StgInt32 *)(adj_code+0x1c) = 0xd08949c1;
539            *(StgInt32 *)(adj_code+0x20) = 0xf2ca8948;
540            *(StgInt32 *)(adj_code+0x24) = 0xf2da100f;
541            *(StgInt32 *)(adj_code+0x28) = 0xf2d1100f;
542            *(StgInt32 *)(adj_code+0x2c) = 0x48c8100f;
543            *(StgInt32 *)(adj_code+0x30) = 0x00120d8b;
544            *(StgInt32 *)(adj_code+0x34) = 0x25ff0000;
545            *(StgInt32 *)(adj_code+0x38) = 0x00000014;
546            *(StgInt32 *)(adj_code+0x3c) = 0x90909090;
547            *(StgInt64 *)(adj_code+0x40) = (StgInt64)obscure_ccall_ret_code;
548            *(StgInt64 *)(adj_code+0x48) = (StgInt64)hptr;
549            *(StgInt64 *)(adj_code+0x50) = (StgInt64)wptr;
550        }
551    }
552
553# else
554    /*
555      stack at call:
556               argn
557               ...
558               arg7
559               return address
560               %rdi,%rsi,%rdx,%rcx,%r8,%r9 = arg1..arg6
561
562      if there are <6 integer args, then we can just push the
563      StablePtr into %edi and shuffle the other args up.
564
565      If there are >=6 integer args, then we have to flush one arg
566      to the stack, and arrange to adjust the stack ptr on return.
567      The stack will be rearranged to this:
568
569             argn
570             ...
571             arg7
572             return address  *** <-- dummy arg in stub fn.
573             arg6
574             obscure_ccall_ret_code
575
576      This unfortunately means that the type of the stub function
577      must have a dummy argument for the original return address
578      pointer inserted just after the 6th integer argument.
579
580      Code for the simple case:
581
582   0:   4d 89 c1                mov    %r8,%r9
583   3:   49 89 c8                mov    %rcx,%r8
584   6:   48 89 d1                mov    %rdx,%rcx
585   9:   48 89 f2                mov    %rsi,%rdx
586   c:   48 89 fe                mov    %rdi,%rsi
587   f:   48 8b 3d 0a 00 00 00    mov    10(%rip),%rdi
588  16:   ff 25 0c 00 00 00       jmpq   *12(%rip)
589  ...
590  20: .quad 0  # aligned on 8-byte boundary
591  28: .quad 0  # aligned on 8-byte boundary
592
593
594  And the version for >=6 integer arguments:
595
596   0:   41 51                   push   %r9
597   2:   ff 35 20 00 00 00       pushq  32(%rip)        # 28 <ccall_adjustor+0x28>
598   8:   4d 89 c1                mov    %r8,%r9
599   b:   49 89 c8                mov    %rcx,%r8
600   e:   48 89 d1                mov    %rdx,%rcx
601  11:   48 89 f2                mov    %rsi,%rdx
602  14:   48 89 fe                mov    %rdi,%rsi
603  17:   48 8b 3d 12 00 00 00    mov    18(%rip),%rdi        # 30 <ccall_adjustor+0x30>
604  1e:   ff 25 14 00 00 00       jmpq   *20(%rip)        # 38 <ccall_adjustor+0x38>
605  ...
606  28: .quad 0  # aligned on 8-byte boundary
607  30: .quad 0  # aligned on 8-byte boundary
608  38: .quad 0  # aligned on 8-byte boundary
609    */
610
611    { 
612        int i = 0;
613        char *c;
614        StgWord8 *adj_code;
615
616        // determine whether we have 6 or more integer arguments,
617        // and therefore need to flush one to the stack.
618        for (c = typeString; *c != '\0'; c++) {
619            if (*c != 'f' && *c != 'd') i++;
620            if (i == 6) break;
621        }
622
623        if (i < 6) {
624            adjustor = allocateExec(0x30,&code);
625            adj_code = (StgWord8*)adjustor;
626
627            *(StgInt32 *)adj_code        = 0x49c1894d;
628            *(StgInt32 *)(adj_code+0x4)  = 0x8948c889;
629            *(StgInt32 *)(adj_code+0x8)  = 0xf28948d1;
630            *(StgInt32 *)(adj_code+0xc)  = 0x48fe8948;
631            *(StgInt32 *)(adj_code+0x10) = 0x000a3d8b;
632            *(StgInt32 *)(adj_code+0x14) = 0x25ff0000;
633            *(StgInt32 *)(adj_code+0x18) = 0x0000000c;
634            *(StgInt64 *)(adj_code+0x20) = (StgInt64)hptr;
635            *(StgInt64 *)(adj_code+0x28) = (StgInt64)wptr;
636        }
637        else
638        {
639            adjustor = allocateExec(0x40,&code);
640            adj_code = (StgWord8*)adjustor;
641
642            *(StgInt32 *)adj_code        = 0x35ff5141;
643            *(StgInt32 *)(adj_code+0x4)  = 0x00000020;
644            *(StgInt32 *)(adj_code+0x8)  = 0x49c1894d;
645            *(StgInt32 *)(adj_code+0xc)  = 0x8948c889;
646            *(StgInt32 *)(adj_code+0x10) = 0xf28948d1;
647            *(StgInt32 *)(adj_code+0x14) = 0x48fe8948;
648            *(StgInt32 *)(adj_code+0x18) = 0x00123d8b;
649            *(StgInt32 *)(adj_code+0x1c) = 0x25ff0000;
650            *(StgInt32 *)(adj_code+0x20) = 0x00000014;
651           
652            *(StgInt64 *)(adj_code+0x28) = (StgInt64)obscure_ccall_ret_code;
653            *(StgInt64 *)(adj_code+0x30) = (StgInt64)hptr;
654            *(StgInt64 *)(adj_code+0x38) = (StgInt64)wptr;
655        }
656    }
657# endif
658
659
660#elif defined(sparc_HOST_ARCH)
661  /* Magic constant computed by inspecting the code length of the following
662     assembly language snippet (offset and machine code prefixed):
663
664     <00>: 9C23A008   sub   %sp, 8, %sp         ! make room for %o4/%o5 in caller's frame
665     <04>: DA23A060   st    %o5, [%sp + 96]     ! shift registers by 2 positions
666     <08>: D823A05C   st    %o4, [%sp + 92]
667     <0C>: 9A10000B   mov   %o3, %o5
668     <10>: 9810000A   mov   %o2, %o4
669     <14>: 96100009   mov   %o1, %o3
670     <18>: 94100008   mov   %o0, %o2
671     <1C>: 13000000   sethi %hi(wptr), %o1      ! load up wptr (1 of 2)
672     <20>: 11000000   sethi %hi(hptr), %o0      ! load up hptr (1 of 2)
673     <24>: 81C26000   jmp   %o1 + %lo(wptr)     ! jump to wptr (load 2 of 2)
674     <28>: 90122000   or    %o0, %lo(hptr), %o0 ! load up hptr (2 of 2, delay slot)
675     <2C>  00000000                             ! place for getting hptr back easily
676
677     ccall'ing on SPARC is easy, because we are quite lucky to push a
678     multiple of 8 bytes (1 word hptr + 1 word dummy arg) in front of the
679     existing arguments (note that %sp must stay double-word aligned at
680     all times, see ABI spec at http://www.sparc.org/standards/psABI3rd.pdf).
681     To do this, we extend the *caller's* stack frame by 2 words and shift
682     the output registers used for argument passing (%o0 - %o5, we are a *leaf*
683     procedure because of the tail-jump) by 2 positions. This makes room in
684     %o0 and %o1 for the additinal arguments, namely  hptr and a dummy (used
685     for destination addr of jump on SPARC, return address on x86, ...). This
686     shouldn't cause any problems for a C-like caller: alloca is implemented
687     similarly, and local variables should be accessed via %fp, not %sp. In a
688     nutshell: This should work! (Famous last words! :-)
689  */
690    adjustor = allocateExec(4*(11+1),&code);
691    {
692        unsigned long *const adj_code = (unsigned long *)adjustor;
693
694        adj_code[ 0]  = 0x9C23A008UL;   /* sub   %sp, 8, %sp         */
695        adj_code[ 1]  = 0xDA23A060UL;   /* st    %o5, [%sp + 96]     */
696        adj_code[ 2]  = 0xD823A05CUL;   /* st    %o4, [%sp + 92]     */
697        adj_code[ 3]  = 0x9A10000BUL;   /* mov   %o3, %o5            */
698        adj_code[ 4]  = 0x9810000AUL;   /* mov   %o2, %o4            */
699        adj_code[ 5]  = 0x96100009UL;   /* mov   %o1, %o3            */
700        adj_code[ 6]  = 0x94100008UL;   /* mov   %o0, %o2            */
701        adj_code[ 7]  = 0x13000000UL;   /* sethi %hi(wptr), %o1      */
702        adj_code[ 7] |= ((unsigned long)wptr) >> 10;
703        adj_code[ 8]  = 0x11000000UL;   /* sethi %hi(hptr), %o0      */
704        adj_code[ 8] |= ((unsigned long)hptr) >> 10;
705        adj_code[ 9]  = 0x81C26000UL;   /* jmp   %o1 + %lo(wptr)     */
706        adj_code[ 9] |= ((unsigned long)wptr) & 0x000003FFUL;
707        adj_code[10]  = 0x90122000UL;   /* or    %o0, %lo(hptr), %o0 */
708        adj_code[10] |= ((unsigned long)hptr) & 0x000003FFUL;
709
710        adj_code[11]  = (unsigned long)hptr;
711
712        /* flush cache */
713        asm("flush %0" : : "r" (adj_code     ));
714        asm("flush %0" : : "r" (adj_code +  2));
715        asm("flush %0" : : "r" (adj_code +  4));
716        asm("flush %0" : : "r" (adj_code +  6));
717        asm("flush %0" : : "r" (adj_code + 10));
718
719        /* max. 5 instructions latency, and we need at >= 1 for returning */
720        asm("nop");
721        asm("nop");
722        asm("nop");
723        asm("nop");
724    }
725#elif defined(alpha_HOST_ARCH)
726  /* Magic constant computed by inspecting the code length of
727     the following assembly language snippet
728     (offset and machine code prefixed; note that the machine code
729     shown is longwords stored in little-endian order):
730
731  <00>: 46520414        mov     a2, a4
732  <04>: 46100412        mov     a0, a2
733  <08>: a61b0020        ldq     a0, 0x20(pv)    # load up hptr
734  <0c>: 46730415        mov     a3, a5
735  <10>: a77b0028        ldq     pv, 0x28(pv)    # load up wptr
736  <14>: 46310413        mov     a1, a3
737  <18>: 6bfb----        jmp     (pv), <hint>    # jump to wptr (with hint)
738  <1c>: 00000000                                # padding for alignment
739  <20>: [8 bytes for hptr quadword]
740  <28>: [8 bytes for wptr quadword]
741
742     The "computed" jump at <08> above is really a jump to a fixed
743     location.  Accordingly, we place an always-correct hint in the
744     jump instruction, namely the address offset from <0c> to wptr,
745     divided by 4, taking the lowest 14 bits.
746
747     We only support passing 4 or fewer argument words, for the same
748     reason described under sparc_HOST_ARCH above by JRS, 21 Aug 01.
749     On the Alpha the first 6 integer arguments are in a0 through a5,
750     and the rest on the stack.  Hence we want to shuffle the original
751     caller's arguments by two.
752
753     On the Alpha the calling convention is so complex and dependent
754     on the callee's signature -- for example, the stack pointer has
755     to be a multiple of 16 -- that it seems impossible to me [ccshan]
756     to handle the general case correctly without changing how the
757     adjustor is called from C.  For now, our solution of shuffling
758     registers only and ignoring the stack only works if the original
759     caller passed 4 or fewer argument words.
760
761TODO: Depending on how much allocation overhead stgMallocBytes uses for
762      header information (more precisely, if the overhead is no more than
763      4 bytes), we should move the first three instructions above down by
764      4 bytes (getting rid of the nop), hence saving memory. [ccshan]
765  */
766    ASSERT(((StgWord64)wptr & 3) == 0);
767    adjustor = allocateExec(48,&code);
768    {
769        StgWord64 *const code = (StgWord64 *)adjustor;
770
771        code[0] = 0x4610041246520414L;
772        code[1] = 0x46730415a61b0020L;
773        code[2] = 0x46310413a77b0028L;
774        code[3] = 0x000000006bfb0000L
775                | (((StgWord32*)(wptr) - (StgWord32*)(code) - 3) & 0x3fff);
776
777        code[4] = (StgWord64)hptr;
778        code[5] = (StgWord64)wptr;
779
780        /* Ensure that instruction cache is consistent with our new code */
781        __asm__ volatile("call_pal %0" : : "i" (PAL_imb));
782    }
783#elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
784
785#define OP_LO(op,lo)  ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
786#define OP_HI(op,hi)  ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
787    {
788        /* The PowerPC Linux (32-bit) calling convention is annoyingly complex.
789           We need to calculate all the details of the stack frame layout,
790           taking into account the types of all the arguments, and then
791           generate code on the fly. */
792   
793        int src_gpr = 3, dst_gpr = 5;
794        int fpr = 3;
795        int src_offset = 0, dst_offset = 0;
796        int n = strlen(typeString),i;
797        int src_locs[n], dst_locs[n];
798        int frameSize;
799        unsigned *code;
800     
801            /* Step 1:
802               Calculate where the arguments should go.
803               src_locs[] will contain the locations of the arguments in the
804               original stack frame passed to the adjustor.
805               dst_locs[] will contain the locations of the arguments after the
806               adjustor runs, on entry to the wrapper proc pointed to by wptr.
807
808               This algorithm is based on the one described on page 3-19 of the
809               System V ABI PowerPC Processor Supplement.
810            */
811        for(i=0;typeString[i];i++)
812        {
813            char t = typeString[i];
814            if((t == 'f' || t == 'd') && fpr <= 8)
815                src_locs[i] = dst_locs[i] = -32-(fpr++);
816            else
817            {
818                if((t == 'l' || t == 'L') && src_gpr <= 9)
819                {
820                    if((src_gpr & 1) == 0)
821                        src_gpr++;
822                    src_locs[i] = -src_gpr;
823                    src_gpr += 2;
824                }
825                else if((t == 'w' || t == 'W') && src_gpr <= 10)
826                {
827                    src_locs[i] = -(src_gpr++);
828                }
829                else
830                {
831                    if(t == 'l' || t == 'L' || t == 'd')
832                    {
833                        if(src_offset % 8)
834                            src_offset += 4;
835                    }
836                    src_locs[i] = src_offset;
837                    src_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
838                }
839
840                    if((t == 'l' || t == 'L') && dst_gpr <= 9)
841                {
842                    if((dst_gpr & 1) == 0)
843                        dst_gpr++;
844                    dst_locs[i] = -dst_gpr;
845                    dst_gpr += 2;
846                }
847                else if((t == 'w' || t == 'W') && dst_gpr <= 10)
848                {
849                    dst_locs[i] = -(dst_gpr++);
850                }
851                else
852                {
853                    if(t == 'l' || t == 'L' || t == 'd')
854                    {
855                        if(dst_offset % 8)
856                            dst_offset += 4;
857                    }
858                    dst_locs[i] = dst_offset;
859                    dst_offset += (t == 'l' || t == 'L' || t == 'd') ? 8 : 4;
860                }
861            }
862        }
863
864        frameSize = dst_offset + 8;
865        frameSize = (frameSize+15) & ~0xF;
866
867            /* Step 2:
868               Build the adjustor.
869            */
870                    // allocate space for at most 4 insns per parameter
871                    // plus 14 more instructions.
872        adjustor = allocateExec(4 * (4*n + 14),&code);
873        code = (unsigned*)adjustor;
874       
875        *code++ = 0x48000008; // b *+8
876            // * Put the hptr in a place where freeHaskellFunctionPtr
877            //   can get at it.
878        *code++ = (unsigned) hptr;
879
880            // * save the link register
881        *code++ = 0x7c0802a6; // mflr r0;
882        *code++ = 0x90010004; // stw r0, 4(r1);
883            // * and build a new stack frame
884        *code++ = OP_LO(0x9421, -frameSize); // stwu r1, -frameSize(r1)
885
886            // * now generate instructions to copy arguments
887            //   from the old stack frame into the new stack frame.
888        for(i=n-1;i>=0;i--)
889        {
890            if(src_locs[i] < -32)
891                ASSERT(dst_locs[i] == src_locs[i]);
892            else if(src_locs[i] < 0)
893            {
894                // source in GPR.
895                ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
896                if(dst_locs[i] < 0)
897                {
898                    ASSERT(dst_locs[i] > -32);
899                        // dst is in GPR, too.
900
901                    if(typeString[i] == 'l' || typeString[i] == 'L')
902                    {
903                            // mr dst+1, src+1
904                        *code++ = 0x7c000378
905                                | ((-dst_locs[i]+1) << 16)
906                                | ((-src_locs[i]+1) << 11)
907                                | ((-src_locs[i]+1) << 21);
908                    }
909                    // mr dst, src
910                    *code++ = 0x7c000378
911                            | ((-dst_locs[i]) << 16)
912                            | ((-src_locs[i]) << 11)
913                            | ((-src_locs[i]) << 21);
914                }
915                else
916                {
917                    if(typeString[i] == 'l' || typeString[i] == 'L')
918                    {
919                            // stw src+1, dst_offset+4(r1)
920                        *code++ = 0x90010000
921                                | ((-src_locs[i]+1) << 21)
922                                | (dst_locs[i] + 4);
923                    }
924                   
925                        // stw src, dst_offset(r1)
926                    *code++ = 0x90010000
927                            | ((-src_locs[i]) << 21)
928                            | (dst_locs[i] + 8);
929                }
930            }
931            else
932            {
933                ASSERT(dst_locs[i] >= 0);
934                ASSERT(typeString[i] != 'f' && typeString[i] != 'd');
935
936                if(typeString[i] == 'l' || typeString[i] == 'L')
937                {
938                    // lwz r0, src_offset(r1)
939                        *code++ = 0x80010000
940                                | (src_locs[i] + frameSize + 8 + 4);
941                    // stw r0, dst_offset(r1)
942                        *code++ = 0x90010000
943                                | (dst_locs[i] + 8 + 4);
944                    }
945                // lwz r0, src_offset(r1)
946                    *code++ = 0x80010000
947                            | (src_locs[i] + frameSize + 8);
948                // stw r0, dst_offset(r1)
949                    *code++ = 0x90010000
950                            | (dst_locs[i] + 8);
951           }
952        }
953
954            // * hptr will be the new first argument.
955            // lis r3, hi(hptr)
956        *code++ = OP_HI(0x3c60, hptr);
957            // ori r3,r3,lo(hptr)
958        *code++ = OP_LO(0x6063, hptr);
959
960            // * we need to return to a piece of code
961            //   which will tear down the stack frame.
962            // lis r11,hi(obscure_ccall_ret_code)
963        *code++ = OP_HI(0x3d60, obscure_ccall_ret_code);
964            // ori r11,r11,lo(obscure_ccall_ret_code)
965        *code++ = OP_LO(0x616b, obscure_ccall_ret_code);
966            // mtlr r11
967        *code++ = 0x7d6803a6;
968
969            // * jump to wptr
970            // lis r11,hi(wptr)
971        *code++ = OP_HI(0x3d60, wptr);
972            // ori r11,r11,lo(wptr)
973        *code++ = OP_LO(0x616b, wptr);
974            // mtctr r11
975        *code++ = 0x7d6903a6;
976            // bctr
977        *code++ = 0x4e800420;
978
979        // Flush the Instruction cache:
980        {
981            unsigned *p = adjustor;
982            while(p < code)
983            {
984                __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
985                                 : : "r" (p));
986                p++;
987            }
988            __asm__ volatile ("sync\n\tisync");
989        }
990    }
991
992#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
993       
994#define OP_LO(op,lo)  ((((unsigned)(op)) << 16) | (((unsigned)(lo)) & 0xFFFF))
995#define OP_HI(op,hi)  ((((unsigned)(op)) << 16) | (((unsigned)(hi)) >> 16))
996    {
997        /* The following code applies to all PowerPC and PowerPC64 platforms
998           whose stack layout is based on the AIX ABI.
999
1000           Besides (obviously) AIX, this includes
1001            Mac OS 9 and BeOS/PPC (may they rest in peace),
1002                which use the 32-bit AIX ABI
1003            powerpc64-linux,
1004                which uses the 64-bit AIX ABI
1005            and Darwin (Mac OS X),
1006                which uses the same stack layout as AIX,
1007                but no function descriptors.
1008
1009           The actual stack-frame shuffling is implemented out-of-line
1010           in the function adjustorCode, in AdjustorAsm.S.
1011           Here, we set up an AdjustorStub structure, which
1012           is a function descriptor (on platforms that have function
1013           descriptors) or a short piece of stub code (on Darwin) to call
1014           adjustorCode with a pointer to the AdjustorStub struct loaded
1015           into register r2.
1016
1017           One nice thing about this is that there is _no_ code generated at
1018           runtime on the platforms that have function descriptors.
1019        */
1020        AdjustorStub *adjustorStub;
1021        int sz = 0, extra_sz, total_sz;
1022
1023#ifdef FUNDESCS
1024        adjustorStub = stgMallocBytes(sizeof(AdjustorStub), "createAdjustor");
1025#else
1026        adjustorStub = allocateExec(sizeof(AdjustorStub),&code);
1027#endif
1028        adjustor = adjustorStub;
1029           
1030        adjustorStub->code = (void*) &adjustorCode;
1031
1032#ifdef FUNDESCS
1033            // function descriptors are a cool idea.
1034            // We don't need to generate any code at runtime.
1035        adjustorStub->toc = adjustorStub;
1036#else
1037
1038            // no function descriptors :-(
1039            // We need to do things "by hand".
1040#if defined(powerpc_HOST_ARCH)
1041            // lis  r2, hi(adjustorStub)
1042        adjustorStub->lis = OP_HI(0x3c40, adjustorStub);
1043            // ori  r2, r2, lo(adjustorStub)
1044        adjustorStub->ori = OP_LO(0x6042, adjustorStub);
1045            // lwz r0, code(r2)
1046        adjustorStub->lwz = OP_LO(0x8002, (char*)(&adjustorStub->code)
1047                                        - (char*)adjustorStub);
1048            // mtctr r0
1049        adjustorStub->mtctr = 0x7c0903a6;
1050            // bctr
1051        adjustorStub->bctr = 0x4e800420;
1052#else
1053        barf("adjustor creation not supported on this platform");
1054#endif
1055
1056        // Flush the Instruction cache:
1057        {
1058            int n = sizeof(AdjustorStub)/sizeof(unsigned);
1059            unsigned *p = (unsigned*)adjustor;
1060            while(n--)
1061            {
1062                __asm__ volatile ("dcbf 0,%0\n\tsync\n\ticbi 0,%0"
1063                                    : : "r" (p));
1064                p++;
1065            }
1066            __asm__ volatile ("sync\n\tisync");
1067        }
1068#endif
1069
1070            // Calculate the size of the stack frame, in words.
1071        sz = totalArgumentSize(typeString);
1072       
1073            // The first eight words of the parameter area
1074            // are just "backing store" for the parameters passed in
1075            // the GPRs. extra_sz is the number of words beyond those first
1076            // 8 words.
1077        extra_sz = sz - 8;
1078        if(extra_sz < 0)
1079            extra_sz = 0;
1080
1081            // Calculate the total size of the stack frame.
1082        total_sz = (6 /* linkage area */
1083                  + 8 /* minimum parameter area */
1084                  + 2 /* two extra arguments */
1085                  + extra_sz)*sizeof(StgWord);
1086       
1087            // align to 16 bytes.
1088            // AIX only requires 8 bytes, but who cares?
1089        total_sz = (total_sz+15) & ~0xF;
1090       
1091            // Fill in the information that adjustorCode in AdjustorAsm.S
1092            // will use to create a new stack frame with the additional args.
1093        adjustorStub->hptr = hptr;
1094        adjustorStub->wptr = wptr;
1095        adjustorStub->negative_framesize = -total_sz;
1096        adjustorStub->extrawords_plus_one = extra_sz + 1;
1097    }
1098
1099#elif defined(ia64_HOST_ARCH)
1100/*
1101    Up to 8 inputs are passed in registers.  We flush the last two inputs to
1102    the stack, initially into the 16-byte scratch region left by the caller.
1103    We then shuffle the others along by 4 (taking 2 registers for ourselves
1104    to save return address and previous function state - we need to come back
1105    here on the way out to restore the stack, so this is a real function
1106    rather than just a trampoline).
1107   
1108    The function descriptor we create contains the gp of the target function
1109    so gp is already loaded correctly.
1110
1111        [MLX]       alloc r16=ar.pfs,10,2,0
1112                    movl r17=wptr
1113        [MII]       st8.spill [r12]=r38,8               // spill in6 (out4)
1114                    mov r41=r37                         // out7 = in5 (out3)
1115                    mov r40=r36;;                       // out6 = in4 (out2)
1116        [MII]       st8.spill [r12]=r39                 // spill in7 (out5)
1117                    mov.sptk b6=r17,50
1118                    mov r38=r34;;                       // out4 = in2 (out0)
1119        [MII]       mov r39=r35                         // out5 = in3 (out1)
1120                    mov r37=r33                         // out3 = in1 (loc1)
1121                    mov r36=r32                         // out2 = in0 (loc0)
1122        [MLX]       adds r12=-24,r12                    // update sp
1123                    movl r34=hptr;;                     // out0 = hptr
1124        [MIB]       mov r33=r16                         // loc1 = ar.pfs
1125                    mov r32=b0                          // loc0 = retaddr
1126                    br.call.sptk.many b0=b6;;
1127
1128        [MII]       adds r12=-16,r12
1129                    mov b0=r32
1130                    mov.i ar.pfs=r33
1131        [MFB]       nop.m 0x0
1132                    nop.f 0x0
1133                    br.ret.sptk.many b0;;
1134*/
1135
1136/* These macros distribute a long constant into the two words of an MLX bundle */
1137#define BITS(val,start,count)   (((val) >> (start)) & ((1 << (count))-1))
1138#define MOVL_LOWORD(val)        (BITS(val,22,18) << 46)
1139#define MOVL_HIWORD(val)        ( (BITS(val,0,7)    << 36)      \
1140                                | (BITS(val,7,9)    << 50)      \
1141                                | (BITS(val,16,5)   << 45)      \
1142                                | (BITS(val,21,1)   << 44)      \
1143                                | (BITS(val,40,23))             \
1144                                | (BITS(val,63,1)    << 59))
1145
1146    {
1147        StgStablePtr stable;
1148        IA64FunDesc *wdesc = (IA64FunDesc *)wptr;
1149        StgWord64 wcode = wdesc->ip;
1150        IA64FunDesc *fdesc;
1151        StgWord64 *code;
1152
1153        /* we allocate on the Haskell heap since malloc'd memory isn't
1154         * executable - argh */
1155        /* Allocated memory is word-aligned (8 bytes) but functions on ia64
1156         * must be aligned to 16 bytes.  We allocate an extra 8 bytes of
1157         * wiggle room so that we can put the code on a 16 byte boundary. */
1158        adjustor = stgAllocStable(sizeof(IA64FunDesc)+18*8+8, &stable);
1159
1160        fdesc = (IA64FunDesc *)adjustor;
1161        code = (StgWord64 *)(fdesc + 1);
1162        /* add 8 bytes to code if needed to align to a 16-byte boundary */
1163        if ((StgWord64)code & 15) code++;
1164        fdesc->ip = (StgWord64)code;
1165        fdesc->gp = wdesc->gp;
1166
1167        code[0]  = 0x0000058004288004 | MOVL_LOWORD(wcode);
1168        code[1]  = 0x6000000220000000 | MOVL_HIWORD(wcode);
1169        code[2]  = 0x029015d818984001;
1170        code[3]  = 0x8401200500420094;
1171        code[4]  = 0x886011d8189c0001;
1172        code[5]  = 0x84011004c00380c0;
1173        code[6]  = 0x0250210046013800;
1174        code[7]  = 0x8401000480420084;
1175        code[8]  = 0x0000233f19a06005 | MOVL_LOWORD((StgWord64)hptr);
1176        code[9]  = 0x6000000440000000 | MOVL_HIWORD((StgWord64)hptr);
1177        code[10] = 0x0200210020010811;
1178        code[11] = 0x1080006800006200;
1179        code[12] = 0x0000210018406000;
1180        code[13] = 0x00aa021000038005;
1181        code[14] = 0x000000010000001d;
1182        code[15] = 0x0084000880000200;
1183
1184        /* save stable pointers in convenient form */
1185        code[16] = (StgWord64)hptr;
1186        code[17] = (StgWord64)stable;
1187    }
1188#else
1189    barf("adjustor creation not supported on this platform");
1190#endif
1191    break;
1192 
1193  default:
1194    ASSERT(0);
1195    break;
1196  }
1197
1198  /* Have fun! */
1199  return code;
1200}
1201
1202
1203void
1204freeHaskellFunctionPtr(void* ptr)
1205{
1206#if defined(i386_HOST_ARCH)
1207 if ( *(unsigned char*)ptr != 0xe8 &&
1208      *(unsigned char*)ptr != 0x58 ) {
1209   errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1210   return;
1211 }
1212 if (*(unsigned char*)ptr == 0xe8) { /* Aha, a ccall adjustor! */
1213     freeStablePtr(((AdjustorStub*)ptr)->hptr);
1214 } else {
1215    freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x02)));
1216 }
1217#elif defined(x86_64_HOST_ARCH)
1218 if ( *(StgWord16 *)ptr == 0x894d ) {
1219     freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+
1220#if defined(mingw32_HOST_OS)
1221                                                   0x28
1222#else
1223                                                   0x20
1224#endif
1225                                                       ));
1226#if !defined(mingw32_HOST_OS)
1227 } else if ( *(StgWord16 *)ptr == 0x5141 ) {
1228     freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x30));
1229#endif
1230#if defined(mingw32_HOST_OS)
1231 } else if ( *(StgWord16 *)ptr == 0x8348 ) {
1232     freeStablePtr(*(StgStablePtr*)((StgWord8*)ptr+0x48));
1233#endif
1234 } else {
1235   errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1236   return;
1237 }
1238#elif defined(sparc_HOST_ARCH)
1239 if ( *(unsigned long*)ptr != 0x9C23A008UL ) {
1240   errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1241   return;
1242 }
1243
1244 /* Free the stable pointer first..*/
1245 freeStablePtr(*((StgStablePtr*)((unsigned long*)ptr + 11)));
1246#elif defined(alpha_HOST_ARCH)
1247 if ( *(StgWord64*)ptr != 0xa77b0018a61b0010L ) {
1248   errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1249   return;
1250 }
1251
1252 /* Free the stable pointer first..*/
1253 freeStablePtr(*((StgStablePtr*)((unsigned char*)ptr + 0x10)));
1254#elif defined(powerpc_HOST_ARCH) && defined(linux_HOST_OS)
1255 if ( *(StgWord*)ptr != 0x48000008 ) {
1256   errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1257   return;
1258 }
1259 freeStablePtr(((StgStablePtr*)ptr)[1]);
1260#elif defined(powerpc_HOST_ARCH) || defined(powerpc64_HOST_ARCH)
1261 if ( ((AdjustorStub*)ptr)->code != (StgFunPtr) &adjustorCode ) {
1262   errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1263   return;
1264 }
1265 freeStablePtr(((AdjustorStub*)ptr)->hptr);
1266#elif defined(ia64_HOST_ARCH)
1267 IA64FunDesc *fdesc = (IA64FunDesc *)ptr;
1268 StgWord64 *code = (StgWord64 *)(fdesc+1);
1269
1270 if (fdesc->ip != (StgWord64)code) {
1271   errorBelch("freeHaskellFunctionPtr: not for me, guv! %p\n", ptr);
1272   return;
1273 }
1274 freeStablePtr((StgStablePtr)code[16]);
1275 freeStablePtr((StgStablePtr)code[17]);
1276 return;
1277#else
1278 ASSERT(0);
1279#endif
1280 // Can't write to this memory, it is only executable:
1281 // *((unsigned char*)ptr) = '\0';
1282
1283 freeExec(ptr);
1284}
1285
1286#endif // !USE_LIBFFI_FOR_ADJUSTORS
Note: See TracBrowser for help on using the browser.