1/* -----------------------------------------------------------------------
2   ffi.c - (c) 2003-2004 Randolph Chung <tausq@debian.org>
3           (c) 2008 Red Hat, Inc.
4
5   HPPA Foreign Function Interface
6   HP-UX PA ABI support (c) 2006 Free Software Foundation, Inc.
7
8   Permission is hereby granted, free of charge, to any person obtaining
9   a copy of this software and associated documentation files (the
10   ``Software''), to deal in the Software without restriction, including
11   without limitation the rights to use, copy, modify, merge, publish,
12   distribute, sublicense, and/or sell copies of the Software, and to
13   permit persons to whom the Software is furnished to do so, subject to
14   the following conditions:
15
16   The above copyright notice and this permission notice shall be included
17   in all copies or substantial portions of the Software.
18
19   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
20   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
23   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
24   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26   DEALINGS IN THE SOFTWARE.
27   ----------------------------------------------------------------------- */
28
29#include <ffi.h>
30#include <ffi_common.h>
31
32#include <stdlib.h>
33#include <stdio.h>
34
35#define ROUND_UP(v, a)  (((size_t)(v) + (a) - 1) & ~((a) - 1))
36
37#define MIN_STACK_SIZE  64
38#define FIRST_ARG_SLOT  9
39#define DEBUG_LEVEL   0
40
41#define fldw(addr, fpreg) \
42  __asm__ volatile ("fldw 0(%0), %%" #fpreg "L" : : "r"(addr) : #fpreg)
43#define fstw(fpreg, addr) \
44  __asm__ volatile ("fstw %%" #fpreg "L, 0(%0)" : : "r"(addr))
45#define fldd(addr, fpreg) \
46  __asm__ volatile ("fldd 0(%0), %%" #fpreg : : "r"(addr) : #fpreg)
47#define fstd(fpreg, addr) \
48  __asm__ volatile ("fstd %%" #fpreg "L, 0(%0)" : : "r"(addr))
49
50#define debug(lvl, x...) do { if (lvl <= DEBUG_LEVEL) { printf(x); } } while (0)
51
52static inline int ffi_struct_type(ffi_type *t)
53{
54  size_t sz = t->size;
55
56  /* Small structure results are passed in registers,
57     larger ones are passed by pointer.  Note that
58     small structures of size 2, 4 and 8 differ from
59     the corresponding integer types in that they have
60     different alignment requirements.  */
61
62  if (sz <= 1)
63    return FFI_TYPE_UINT8;
64  else if (sz == 2)
65    return FFI_TYPE_SMALL_STRUCT2;
66  else if (sz == 3)
67    return FFI_TYPE_SMALL_STRUCT3;
68  else if (sz == 4)
69    return FFI_TYPE_SMALL_STRUCT4;
70  else if (sz == 5)
71    return FFI_TYPE_SMALL_STRUCT5;
72  else if (sz == 6)
73    return FFI_TYPE_SMALL_STRUCT6;
74  else if (sz == 7)
75    return FFI_TYPE_SMALL_STRUCT7;
76  else if (sz <= 8)
77    return FFI_TYPE_SMALL_STRUCT8;
78  else
79    return FFI_TYPE_STRUCT; /* else, we pass it by pointer.  */
80}
81
82/* PA has a downward growing stack, which looks like this:
83
84   Offset
85	[ Variable args ]
86   SP = (4*(n+9))       arg word N
87   ...
88   SP-52                arg word 4
89	[ Fixed args ]
90   SP-48                arg word 3
91   SP-44                arg word 2
92   SP-40                arg word 1
93   SP-36                arg word 0
94	[ Frame marker ]
95   ...
96   SP-20                RP
97   SP-4                 previous SP
98
99   The first four argument words on the stack are reserved for use by
100   the callee.  Instead, the general and floating registers replace
101   the first four argument slots.  Non FP arguments are passed solely
102   in the general registers.  FP arguments are passed in both general
103   and floating registers when using libffi.
104
105   Non-FP 32-bit args are passed in gr26, gr25, gr24 and gr23.
106   Non-FP 64-bit args are passed in register pairs, starting
107   on an odd numbered register (i.e. r25+r26 and r23+r24).
108   FP 32-bit arguments are passed in fr4L, fr5L, fr6L and fr7L.
109   FP 64-bit arguments are passed in fr5 and fr7.
110
111   The registers are allocated in the same manner as stack slots.
112   This allows the callee to save its arguments on the stack if
113   necessary:
114
115   arg word 3 -> gr23 or fr7L
116   arg word 2 -> gr24 or fr6L or fr7R
117   arg word 1 -> gr25 or fr5L
118   arg word 0 -> gr26 or fr4L or fr5R
119
120   Note that fr4R and fr6R are never used for arguments (i.e.,
121   doubles are not passed in fr4 or fr6).
122
123   The rest of the arguments are passed on the stack starting at SP-52,
124   but 64-bit arguments need to be aligned to an 8-byte boundary
125
126   This means we can have holes either in the register allocation,
127   or in the stack.  */
128
129/* ffi_prep_args is called by the assembly routine once stack space
130   has been allocated for the function's arguments
131
132   The following code will put everything into the stack frame
133   (which was allocated by the asm routine), and on return
134   the asm routine will load the arguments that should be
135   passed by register into the appropriate registers
136
137   NOTE: We load floating point args in this function... that means we
138   assume gcc will not mess with fp regs in here.  */
139
140void ffi_prep_args_pa32(UINT32 *stack, extended_cif *ecif, unsigned bytes)
141{
142  register unsigned int i;
143  register ffi_type **p_arg;
144  register void **p_argv;
145  unsigned int slot = FIRST_ARG_SLOT;
146  char *dest_cpy;
147  size_t len;
148
149  debug(1, "%s: stack = %p, ecif = %p, bytes = %u\n", __FUNCTION__, stack,
150	ecif, bytes);
151
152  p_arg = ecif->cif->arg_types;
153  p_argv = ecif->avalue;
154
155  for (i = 0; i < ecif->cif->nargs; i++)
156    {
157      int type = (*p_arg)->type;
158
159      switch (type)
160	{
161	case FFI_TYPE_SINT8:
162	  *(SINT32 *)(stack - slot) = *(SINT8 *)(*p_argv);
163	  break;
164
165	case FFI_TYPE_UINT8:
166	  *(UINT32 *)(stack - slot) = *(UINT8 *)(*p_argv);
167	  break;
168
169	case FFI_TYPE_SINT16:
170	  *(SINT32 *)(stack - slot) = *(SINT16 *)(*p_argv);
171	  break;
172
173	case FFI_TYPE_UINT16:
174	  *(UINT32 *)(stack - slot) = *(UINT16 *)(*p_argv);
175	  break;
176
177	case FFI_TYPE_UINT32:
178	case FFI_TYPE_SINT32:
179	case FFI_TYPE_POINTER:
180	  debug(3, "Storing UINT32 %u in slot %u\n", *(UINT32 *)(*p_argv),
181		slot);
182	  *(UINT32 *)(stack - slot) = *(UINT32 *)(*p_argv);
183	  break;
184
185	case FFI_TYPE_UINT64:
186	case FFI_TYPE_SINT64:
187	  /* Align slot for 64-bit type.  */
188	  slot += (slot & 1) ? 1 : 2;
189	  *(UINT64 *)(stack - slot) = *(UINT64 *)(*p_argv);
190	  break;
191
192	case FFI_TYPE_FLOAT:
193	  /* First 4 args go in fr4L - fr7L.  */
194	  debug(3, "Storing UINT32(float) in slot %u\n", slot);
195	  *(UINT32 *)(stack - slot) = *(UINT32 *)(*p_argv);
196	  switch (slot - FIRST_ARG_SLOT)
197	    {
198	    /* First 4 args go in fr4L - fr7L.  */
199	    case 0: fldw(stack - slot, fr4); break;
200	    case 1: fldw(stack - slot, fr5); break;
201	    case 2: fldw(stack - slot, fr6); break;
202	    case 3: fldw(stack - slot, fr7); break;
203	    }
204	  break;
205
206	case FFI_TYPE_DOUBLE:
207	  /* Align slot for 64-bit type.  */
208	  slot += (slot & 1) ? 1 : 2;
209	  debug(3, "Storing UINT64(double) at slot %u\n", slot);
210	  *(UINT64 *)(stack - slot) = *(UINT64 *)(*p_argv);
211	  switch (slot - FIRST_ARG_SLOT)
212	    {
213	      /* First 2 args go in fr5, fr7.  */
214	      case 1: fldd(stack - slot, fr5); break;
215	      case 3: fldd(stack - slot, fr7); break;
216	    }
217	  break;
218
219#ifdef PA_HPUX
220	case FFI_TYPE_LONGDOUBLE:
221	  /* Long doubles are passed in the same manner as structures
222	     larger than 8 bytes.  */
223	  *(UINT32 *)(stack - slot) = (UINT32)(*p_argv);
224	  break;
225#endif
226
227	case FFI_TYPE_STRUCT:
228
229	  /* Structs smaller or equal than 4 bytes are passed in one
230	     register. Structs smaller or equal 8 bytes are passed in two
231	     registers. Larger structures are passed by pointer.  */
232
233	  len = (*p_arg)->size;
234	  if (len <= 4)
235	    {
236	      dest_cpy = (char *)(stack - slot) + 4 - len;
237	      memcpy(dest_cpy, (char *)*p_argv, len);
238	    }
239	  else if (len <= 8)
240	    {
241	      slot += (slot & 1) ? 1 : 2;
242	      dest_cpy = (char *)(stack - slot) + 8 - len;
243	      memcpy(dest_cpy, (char *)*p_argv, len);
244	    }
245	  else
246	    *(UINT32 *)(stack - slot) = (UINT32)(*p_argv);
247	  break;
248
249	default:
250	  FFI_ASSERT(0);
251	}
252
253      slot++;
254      p_arg++;
255      p_argv++;
256    }
257
258  /* Make sure we didn't mess up and scribble on the stack.  */
259  {
260    unsigned int n;
261
262    debug(5, "Stack setup:\n");
263    for (n = 0; n < (bytes + 3) / 4; n++)
264      {
265	if ((n%4) == 0) { debug(5, "\n%08x: ", (unsigned int)(stack - n)); }
266	debug(5, "%08x ", *(stack - n));
267      }
268    debug(5, "\n");
269  }
270
271  FFI_ASSERT(slot * 4 <= bytes);
272
273  return;
274}
275
276static void ffi_size_stack_pa32(ffi_cif *cif)
277{
278  ffi_type **ptr;
279  int i;
280  int z = 0; /* # stack slots */
281
282  for (ptr = cif->arg_types, i = 0; i < cif->nargs; ptr++, i++)
283    {
284      int type = (*ptr)->type;
285
286      switch (type)
287	{
288	case FFI_TYPE_DOUBLE:
289	case FFI_TYPE_UINT64:
290	case FFI_TYPE_SINT64:
291	  z += 2 + (z & 1); /* must start on even regs, so we may waste one */
292	  break;
293
294#ifdef PA_HPUX
295	case FFI_TYPE_LONGDOUBLE:
296#endif
297	case FFI_TYPE_STRUCT:
298	  z += 1; /* pass by ptr, callee will copy */
299	  break;
300
301	default: /* <= 32-bit values */
302	  z++;
303	}
304    }
305
306  /* We can fit up to 6 args in the default 64-byte stack frame,
307     if we need more, we need more stack.  */
308  if (z <= 6)
309    cif->bytes = MIN_STACK_SIZE; /* min stack size */
310  else
311    cif->bytes = 64 + ROUND_UP((z - 6) * sizeof(UINT32), MIN_STACK_SIZE);
312
313  debug(3, "Calculated stack size is %u bytes\n", cif->bytes);
314}
315
316/* Perform machine dependent cif processing.  */
317ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
318{
319  /* Set the return type flag */
320  switch (cif->rtype->type)
321    {
322    case FFI_TYPE_VOID:
323    case FFI_TYPE_FLOAT:
324    case FFI_TYPE_DOUBLE:
325      cif->flags = (unsigned) cif->rtype->type;
326      break;
327
328#ifdef PA_HPUX
329    case FFI_TYPE_LONGDOUBLE:
330      /* Long doubles are treated like a structure.  */
331      cif->flags = FFI_TYPE_STRUCT;
332      break;
333#endif
334
335    case FFI_TYPE_STRUCT:
336      /* For the return type we have to check the size of the structures.
337	 If the size is smaller or equal 4 bytes, the result is given back
338	 in one register. If the size is smaller or equal 8 bytes than we
339	 return the result in two registers. But if the size is bigger than
340	 8 bytes, we work with pointers.  */
341      cif->flags = ffi_struct_type(cif->rtype);
342      break;
343
344    case FFI_TYPE_UINT64:
345    case FFI_TYPE_SINT64:
346      cif->flags = FFI_TYPE_UINT64;
347      break;
348
349    default:
350      cif->flags = FFI_TYPE_INT;
351      break;
352    }
353
354  /* Lucky us, because of the unique PA ABI we get to do our
355     own stack sizing.  */
356  switch (cif->abi)
357    {
358    case FFI_PA32:
359      ffi_size_stack_pa32(cif);
360      break;
361
362    default:
363      FFI_ASSERT(0);
364      break;
365    }
366
367  return FFI_OK;
368}
369
370extern void ffi_call_pa32(void (*)(UINT32 *, extended_cif *, unsigned),
371			  extended_cif *, unsigned, unsigned, unsigned *,
372			  void (*fn)(void));
373
374void ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
375{
376  extended_cif ecif;
377
378  ecif.cif = cif;
379  ecif.avalue = avalue;
380
381  /* If the return value is a struct and we don't have a return
382     value address then we need to make one.  */
383
384  if (rvalue == NULL
385#ifdef PA_HPUX
386      && (cif->rtype->type == FFI_TYPE_STRUCT
387	  || cif->rtype->type == FFI_TYPE_LONGDOUBLE))
388#else
389      && cif->rtype->type == FFI_TYPE_STRUCT)
390#endif
391    {
392      ecif.rvalue = alloca(cif->rtype->size);
393    }
394  else
395    ecif.rvalue = rvalue;
396
397
398  switch (cif->abi)
399    {
400    case FFI_PA32:
401      debug(3, "Calling ffi_call_pa32: ecif=%p, bytes=%u, flags=%u, rvalue=%p, fn=%p\n", &ecif, cif->bytes, cif->flags, ecif.rvalue, (void *)fn);
402      ffi_call_pa32(ffi_prep_args_pa32, &ecif, cif->bytes,
403		     cif->flags, ecif.rvalue, fn);
404      break;
405
406    default:
407      FFI_ASSERT(0);
408      break;
409    }
410}
411
412#if FFI_CLOSURES
413/* This is more-or-less an inverse of ffi_call -- we have arguments on
414   the stack, and we need to fill them into a cif structure and invoke
415   the user function. This really ought to be in asm to make sure
416   the compiler doesn't do things we don't expect.  */
417ffi_status ffi_closure_inner_pa32(ffi_closure *closure, UINT32 *stack)
418{
419  ffi_cif *cif;
420  void **avalue;
421  void *rvalue;
422  UINT32 ret[2]; /* function can return up to 64-bits in registers */
423  ffi_type **p_arg;
424  char *tmp;
425  int i, avn;
426  unsigned int slot = FIRST_ARG_SLOT;
427  register UINT32 r28 asm("r28");
428
429  cif = closure->cif;
430
431  /* If returning via structure, callee will write to our pointer.  */
432  if (cif->flags == FFI_TYPE_STRUCT)
433    rvalue = (void *)r28;
434  else
435    rvalue = &ret[0];
436
437  avalue = (void **)alloca(cif->nargs * FFI_SIZEOF_ARG);
438  avn = cif->nargs;
439  p_arg = cif->arg_types;
440
441  for (i = 0; i < avn; i++)
442    {
443      int type = (*p_arg)->type;
444
445      switch (type)
446	{
447	case FFI_TYPE_SINT8:
448	case FFI_TYPE_UINT8:
449	case FFI_TYPE_SINT16:
450	case FFI_TYPE_UINT16:
451	case FFI_TYPE_SINT32:
452	case FFI_TYPE_UINT32:
453	case FFI_TYPE_POINTER:
454	  avalue[i] = (char *)(stack - slot) + sizeof(UINT32) - (*p_arg)->size;
455	  break;
456
457	case FFI_TYPE_SINT64:
458	case FFI_TYPE_UINT64:
459	  slot += (slot & 1) ? 1 : 2;
460	  avalue[i] = (void *)(stack - slot);
461	  break;
462
463	case FFI_TYPE_FLOAT:
464#ifdef PA_LINUX
465	  /* The closure call is indirect.  In Linux, floating point
466	     arguments in indirect calls with a prototype are passed
467	     in the floating point registers instead of the general
468	     registers.  So, we need to replace what was previously
469	     stored in the current slot with the value in the
470	     corresponding floating point register.  */
471	  switch (slot - FIRST_ARG_SLOT)
472	    {
473	    case 0: fstw(fr4, (void *)(stack - slot)); break;
474	    case 1: fstw(fr5, (void *)(stack - slot)); break;
475	    case 2: fstw(fr6, (void *)(stack - slot)); break;
476	    case 3: fstw(fr7, (void *)(stack - slot)); break;
477	    }
478#endif
479	  avalue[i] = (void *)(stack - slot);
480	  break;
481
482	case FFI_TYPE_DOUBLE:
483	  slot += (slot & 1) ? 1 : 2;
484#ifdef PA_LINUX
485	  /* See previous comment for FFI_TYPE_FLOAT.  */
486	  switch (slot - FIRST_ARG_SLOT)
487	    {
488	    case 1: fstd(fr5, (void *)(stack - slot)); break;
489	    case 3: fstd(fr7, (void *)(stack - slot)); break;
490	    }
491#endif
492	  avalue[i] = (void *)(stack - slot);
493	  break;
494
495	case FFI_TYPE_STRUCT:
496	  /* Structs smaller or equal than 4 bytes are passed in one
497	     register. Structs smaller or equal 8 bytes are passed in two
498	     registers. Larger structures are passed by pointer.  */
499	  if((*p_arg)->size <= 4)
500	    {
501	      avalue[i] = (void *)(stack - slot) + sizeof(UINT32) -
502		(*p_arg)->size;
503	    }
504	  else if ((*p_arg)->size <= 8)
505	    {
506	      slot += (slot & 1) ? 1 : 2;
507	      avalue[i] = (void *)(stack - slot) + sizeof(UINT64) -
508		(*p_arg)->size;
509	    }
510	  else
511	    avalue[i] = (void *) *(stack - slot);
512	  break;
513
514	default:
515	  FFI_ASSERT(0);
516	}
517
518      slot++;
519      p_arg++;
520    }
521
522  /* Invoke the closure.  */
523  (closure->fun) (cif, rvalue, avalue, closure->user_data);
524
525  debug(3, "after calling function, ret[0] = %08x, ret[1] = %08x\n", ret[0],
526	ret[1]);
527
528  /* Store the result using the lower 2 bytes of the flags.  */
529  switch (cif->flags)
530    {
531    case FFI_TYPE_UINT8:
532      *(stack - FIRST_ARG_SLOT) = (UINT8)(ret[0] >> 24);
533      break;
534    case FFI_TYPE_SINT8:
535      *(stack - FIRST_ARG_SLOT) = (SINT8)(ret[0] >> 24);
536      break;
537    case FFI_TYPE_UINT16:
538      *(stack - FIRST_ARG_SLOT) = (UINT16)(ret[0] >> 16);
539      break;
540    case FFI_TYPE_SINT16:
541      *(stack - FIRST_ARG_SLOT) = (SINT16)(ret[0] >> 16);
542      break;
543    case FFI_TYPE_INT:
544    case FFI_TYPE_SINT32:
545    case FFI_TYPE_UINT32:
546      *(stack - FIRST_ARG_SLOT) = ret[0];
547      break;
548    case FFI_TYPE_SINT64:
549    case FFI_TYPE_UINT64:
550      *(stack - FIRST_ARG_SLOT) = ret[0];
551      *(stack - FIRST_ARG_SLOT - 1) = ret[1];
552      break;
553
554    case FFI_TYPE_DOUBLE:
555      fldd(rvalue, fr4);
556      break;
557
558    case FFI_TYPE_FLOAT:
559      fldw(rvalue, fr4);
560      break;
561
562    case FFI_TYPE_STRUCT:
563      /* Don't need a return value, done by caller.  */
564      break;
565
566    case FFI_TYPE_SMALL_STRUCT2:
567    case FFI_TYPE_SMALL_STRUCT3:
568    case FFI_TYPE_SMALL_STRUCT4:
569      tmp = (void*)(stack -  FIRST_ARG_SLOT);
570      tmp += 4 - cif->rtype->size;
571      memcpy((void*)tmp, &ret[0], cif->rtype->size);
572      break;
573
574    case FFI_TYPE_SMALL_STRUCT5:
575    case FFI_TYPE_SMALL_STRUCT6:
576    case FFI_TYPE_SMALL_STRUCT7:
577    case FFI_TYPE_SMALL_STRUCT8:
578      {
579	unsigned int ret2[2];
580	int off;
581
582	/* Right justify ret[0] and ret[1] */
583	switch (cif->flags)
584	  {
585	    case FFI_TYPE_SMALL_STRUCT5: off = 3; break;
586	    case FFI_TYPE_SMALL_STRUCT6: off = 2; break;
587	    case FFI_TYPE_SMALL_STRUCT7: off = 1; break;
588	    default: off = 0; break;
589	  }
590
591	memset (ret2, 0, sizeof (ret2));
592	memcpy ((char *)ret2 + off, ret, 8 - off);
593
594	*(stack - FIRST_ARG_SLOT) = ret2[0];
595	*(stack - FIRST_ARG_SLOT - 1) = ret2[1];
596      }
597      break;
598
599    case FFI_TYPE_POINTER:
600    case FFI_TYPE_VOID:
601      break;
602
603    default:
604      debug(0, "assert with cif->flags: %d\n",cif->flags);
605      FFI_ASSERT(0);
606      break;
607    }
608  return FFI_OK;
609}
610
611/* Fill in a closure to refer to the specified fun and user_data.
612   cif specifies the argument and result types for fun.
613   The cif must already be prep'ed.  */
614
615extern void ffi_closure_pa32(void);
616
617ffi_status
618ffi_prep_closure_loc (ffi_closure* closure,
619		      ffi_cif* cif,
620		      void (*fun)(ffi_cif*,void*,void**,void*),
621		      void *user_data,
622		      void *codeloc)
623{
624  UINT32 *tramp = (UINT32 *)(closure->tramp);
625#ifdef PA_HPUX
626  UINT32 *tmp;
627#endif
628
629  FFI_ASSERT (cif->abi == FFI_PA32);
630
631  /* Make a small trampoline that will branch to our
632     handler function. Use PC-relative addressing.  */
633
634#ifdef PA_LINUX
635  tramp[0] = 0xeaa00000; /* b,l .+8,%r21        ; %r21 <- pc+8 */
636  tramp[1] = 0xd6a01c1e; /* depi 0,31,2,%r21    ; mask priv bits */
637  tramp[2] = 0x4aa10028; /* ldw 20(%r21),%r1    ; load plabel */
638  tramp[3] = 0x36b53ff1; /* ldo -8(%r21),%r21   ; get closure addr */
639  tramp[4] = 0x0c201096; /* ldw 0(%r1),%r22     ; address of handler */
640  tramp[5] = 0xeac0c000; /* bv%r0(%r22)         ; branch to handler */
641  tramp[6] = 0x0c281093; /* ldw 4(%r1),%r19     ; GP of handler */
642  tramp[7] = ((UINT32)(ffi_closure_pa32) & ~2);
643
644  /* Flush d/icache -- have to flush up 2 two lines because of
645     alignment.  */
646  __asm__ volatile(
647		   "fdc 0(%0)\n\t"
648		   "fdc %1(%0)\n\t"
649		   "fic 0(%%sr4, %0)\n\t"
650		   "fic %1(%%sr4, %0)\n\t"
651		   "sync\n\t"
652		   "nop\n\t"
653		   "nop\n\t"
654		   "nop\n\t"
655		   "nop\n\t"
656		   "nop\n\t"
657		   "nop\n\t"
658		   "nop\n"
659		   :
660		   : "r"((unsigned long)tramp & ~31),
661		     "r"(32 /* stride */)
662		   : "memory");
663#endif
664
665#ifdef PA_HPUX
666  tramp[0] = 0xeaa00000; /* b,l .+8,%r21        ; %r21 <- pc+8  */
667  tramp[1] = 0xd6a01c1e; /* depi 0,31,2,%r21    ; mask priv bits  */
668  tramp[2] = 0x4aa10038; /* ldw 28(%r21),%r1    ; load plabel  */
669  tramp[3] = 0x36b53ff1; /* ldo -8(%r21),%r21   ; get closure addr  */
670  tramp[4] = 0x0c201096; /* ldw 0(%r1),%r22     ; address of handler  */
671  tramp[5] = 0x02c010b4; /* ldsid (%r22),%r20   ; load space id  */
672  tramp[6] = 0x00141820; /* mtsp %r20,%sr0      ; into %sr0  */
673  tramp[7] = 0xe2c00000; /* be 0(%sr0,%r22)     ; branch to handler  */
674  tramp[8] = 0x0c281093; /* ldw 4(%r1),%r19     ; GP of handler  */
675  tramp[9] = ((UINT32)(ffi_closure_pa32) & ~2);
676
677  /* Flush d/icache -- have to flush three lines because of alignment.  */
678  __asm__ volatile(
679		   "copy %1,%0\n\t"
680		   "fdc,m %2(%0)\n\t"
681		   "fdc,m %2(%0)\n\t"
682		   "fdc,m %2(%0)\n\t"
683		   "ldsid (%1),%0\n\t"
684		   "mtsp %0,%%sr0\n\t"
685		   "copy %1,%0\n\t"
686		   "fic,m %2(%%sr0,%0)\n\t"
687		   "fic,m %2(%%sr0,%0)\n\t"
688		   "fic,m %2(%%sr0,%0)\n\t"
689		   "sync\n\t"
690		   "nop\n\t"
691		   "nop\n\t"
692		   "nop\n\t"
693		   "nop\n\t"
694		   "nop\n\t"
695		   "nop\n\t"
696		   "nop\n"
697		   : "=&r" ((unsigned long)tmp)
698		   : "r" ((unsigned long)tramp & ~31),
699		     "r" (32/* stride */)
700		   : "memory");
701#endif
702
703  closure->cif  = cif;
704  closure->user_data = user_data;
705  closure->fun  = fun;
706
707  return FFI_OK;
708}
709#endif
710