1/* -----------------------------------------------------------------------
2   ffi.c - Copyright (c) 1998, 2007, 2008 Red Hat, Inc.
3	   Copyright (c) 2000 Hewlett Packard Company
4
5   IA64 Foreign Function Interface
6
7   Permission is hereby granted, free of charge, to any person obtaining
8   a copy of this software and associated documentation files (the
9   ``Software''), to deal in the Software without restriction, including
10   without limitation the rights to use, copy, modify, merge, publish,
11   distribute, sublicense, and/or sell copies of the Software, and to
12   permit persons to whom the Software is furnished to do so, subject to
13   the following conditions:
14
15   The above copyright notice and this permission notice shall be included
16   in all copies or substantial portions of the Software.
17
18   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
19   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25   DEALINGS IN THE SOFTWARE.
26   ----------------------------------------------------------------------- */
27
28#include <ffi.h>
29#include <ffi_common.h>
30
31#include <stdlib.h>
32#include <stdbool.h>
33#include <float.h>
34
35#include "ia64_flags.h"
36
37/* A 64-bit pointer value.  In LP64 mode, this is effectively a plain
38   pointer.  In ILP32 mode, it's a pointer that's been extended to
39   64 bits by "addp4".  */
40typedef void *PTR64 __attribute__((mode(DI)));
41
42/* Memory image of fp register contents.  This is the implementation
43   specific format used by ldf.fill/stf.spill.  All we care about is
44   that it wants a 16 byte aligned slot.  */
45typedef struct
46{
47  UINT64 x[2] __attribute__((aligned(16)));
48} fpreg;
49
50
51/* The stack layout given to ffi_call_unix and ffi_closure_unix_inner.  */
52
53struct ia64_args
54{
55  fpreg fp_regs[8];	/* Contents of 8 fp arg registers.  */
56  UINT64 gp_regs[8];	/* Contents of 8 gp arg registers.  */
57  UINT64 other_args[];	/* Arguments passed on stack, variable size.  */
58};
59
60
61/* Adjust ADDR, a pointer to an 8 byte slot, to point to the low LEN bytes.  */
62
63static inline void *
64endian_adjust (void *addr, size_t len)
65{
66#ifdef __BIG_ENDIAN__
67  return addr + (8 - len);
68#else
69  return addr;
70#endif
71}
72
73/* Store VALUE to ADDR in the current cpu implementation's fp spill format.
74   This is a macro instead of a function, so that it works for all 3 floating
75   point types without type conversions.  Type conversion to long double breaks
76   the denorm support.  */
77
78#define stf_spill(addr, value)	\
79  asm ("stf.spill %0 = %1%P0" : "=m" (*addr) : "f"(value));
80
81/* Load a value from ADDR, which is in the current cpu implementation's
82   fp spill format.  As above, this must also be a macro.  */
83
84#define ldf_fill(result, addr)	\
85  asm ("ldf.fill %0 = %1%P1" : "=f"(result) : "m"(*addr));
86
87/* Return the size of the C type associated with with TYPE.  Which will
88   be one of the FFI_IA64_TYPE_HFA_* values.  */
89
90static size_t
91hfa_type_size (int type)
92{
93  switch (type)
94    {
95    case FFI_IA64_TYPE_HFA_FLOAT:
96      return sizeof(float);
97    case FFI_IA64_TYPE_HFA_DOUBLE:
98      return sizeof(double);
99    case FFI_IA64_TYPE_HFA_LDOUBLE:
100      return sizeof(__float80);
101    default:
102      abort ();
103    }
104}
105
106/* Load from ADDR a value indicated by TYPE.  Which will be one of
107   the FFI_IA64_TYPE_HFA_* values.  */
108
109static void
110hfa_type_load (fpreg *fpaddr, int type, void *addr)
111{
112  switch (type)
113    {
114    case FFI_IA64_TYPE_HFA_FLOAT:
115      stf_spill (fpaddr, *(float *) addr);
116      return;
117    case FFI_IA64_TYPE_HFA_DOUBLE:
118      stf_spill (fpaddr, *(double *) addr);
119      return;
120    case FFI_IA64_TYPE_HFA_LDOUBLE:
121      stf_spill (fpaddr, *(__float80 *) addr);
122      return;
123    default:
124      abort ();
125    }
126}
127
128/* Load VALUE into ADDR as indicated by TYPE.  Which will be one of
129   the FFI_IA64_TYPE_HFA_* values.  */
130
131static void
132hfa_type_store (int type, void *addr, fpreg *fpaddr)
133{
134  switch (type)
135    {
136    case FFI_IA64_TYPE_HFA_FLOAT:
137      {
138	float result;
139	ldf_fill (result, fpaddr);
140	*(float *) addr = result;
141	break;
142      }
143    case FFI_IA64_TYPE_HFA_DOUBLE:
144      {
145	double result;
146	ldf_fill (result, fpaddr);
147	*(double *) addr = result;
148	break;
149      }
150    case FFI_IA64_TYPE_HFA_LDOUBLE:
151      {
152	__float80 result;
153	ldf_fill (result, fpaddr);
154	*(__float80 *) addr = result;
155	break;
156      }
157    default:
158      abort ();
159    }
160}
161
162/* Is TYPE a struct containing floats, doubles, or extended doubles,
163   all of the same fp type?  If so, return the element type.  Return
164   FFI_TYPE_VOID if not.  */
165
166static int
167hfa_element_type (ffi_type *type, int nested)
168{
169  int element = FFI_TYPE_VOID;
170
171  switch (type->type)
172    {
173    case FFI_TYPE_FLOAT:
174      /* We want to return VOID for raw floating-point types, but the
175	 synthetic HFA type if we're nested within an aggregate.  */
176      if (nested)
177	element = FFI_IA64_TYPE_HFA_FLOAT;
178      break;
179
180    case FFI_TYPE_DOUBLE:
181      /* Similarly.  */
182      if (nested)
183	element = FFI_IA64_TYPE_HFA_DOUBLE;
184      break;
185
186    case FFI_TYPE_LONGDOUBLE:
187      /* Similarly, except that that HFA is true for double extended,
188	 but not quad precision.  Both have sizeof == 16, so tell the
189	 difference based on the precision.  */
190      if (LDBL_MANT_DIG == 64 && nested)
191	element = FFI_IA64_TYPE_HFA_LDOUBLE;
192      break;
193
194    case FFI_TYPE_STRUCT:
195      {
196	ffi_type **ptr = &type->elements[0];
197
198	for (ptr = &type->elements[0]; *ptr ; ptr++)
199	  {
200	    int sub_element = hfa_element_type (*ptr, 1);
201	    if (sub_element == FFI_TYPE_VOID)
202	      return FFI_TYPE_VOID;
203
204	    if (element == FFI_TYPE_VOID)
205	      element = sub_element;
206	    else if (element != sub_element)
207	      return FFI_TYPE_VOID;
208	  }
209      }
210      break;
211
212    default:
213      return FFI_TYPE_VOID;
214    }
215
216  return element;
217}
218
219
220/* Perform machine dependent cif processing. */
221
222ffi_status
223ffi_prep_cif_machdep(ffi_cif *cif)
224{
225  int flags;
226
227  /* Adjust cif->bytes to include space for the bits of the ia64_args frame
228     that preceeds the integer register portion.  The estimate that the
229     generic bits did for the argument space required is good enough for the
230     integer component.  */
231  cif->bytes += offsetof(struct ia64_args, gp_regs[0]);
232  if (cif->bytes < sizeof(struct ia64_args))
233    cif->bytes = sizeof(struct ia64_args);
234
235  /* Set the return type flag. */
236  flags = cif->rtype->type;
237  switch (cif->rtype->type)
238    {
239    case FFI_TYPE_LONGDOUBLE:
240      /* Leave FFI_TYPE_LONGDOUBLE as meaning double extended precision,
241	 and encode quad precision as a two-word integer structure.  */
242      if (LDBL_MANT_DIG != 64)
243	flags = FFI_IA64_TYPE_SMALL_STRUCT | (16 << 8);
244      break;
245
246    case FFI_TYPE_STRUCT:
247      {
248        size_t size = cif->rtype->size;
249  	int hfa_type = hfa_element_type (cif->rtype, 0);
250
251	if (hfa_type != FFI_TYPE_VOID)
252	  {
253	    size_t nelts = size / hfa_type_size (hfa_type);
254	    if (nelts <= 8)
255	      flags = hfa_type | (size << 8);
256	  }
257	else
258	  {
259	    if (size <= 32)
260	      flags = FFI_IA64_TYPE_SMALL_STRUCT | (size << 8);
261	  }
262      }
263      break;
264
265    default:
266      break;
267    }
268  cif->flags = flags;
269
270  return FFI_OK;
271}
272
273extern int ffi_call_unix (struct ia64_args *, PTR64, void (*)(void), UINT64);
274
275void
276ffi_call(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
277{
278  struct ia64_args *stack;
279  long i, avn, gpcount, fpcount;
280  ffi_type **p_arg;
281
282  FFI_ASSERT (cif->abi == FFI_UNIX);
283
284  /* If we have no spot for a return value, make one.  */
285  if (rvalue == NULL && cif->rtype->type != FFI_TYPE_VOID)
286    rvalue = alloca (cif->rtype->size);
287
288  /* Allocate the stack frame.  */
289  stack = alloca (cif->bytes);
290
291  gpcount = fpcount = 0;
292  avn = cif->nargs;
293  for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++)
294    {
295      switch ((*p_arg)->type)
296	{
297	case FFI_TYPE_SINT8:
298	  stack->gp_regs[gpcount++] = *(SINT8 *)avalue[i];
299	  break;
300	case FFI_TYPE_UINT8:
301	  stack->gp_regs[gpcount++] = *(UINT8 *)avalue[i];
302	  break;
303	case FFI_TYPE_SINT16:
304	  stack->gp_regs[gpcount++] = *(SINT16 *)avalue[i];
305	  break;
306	case FFI_TYPE_UINT16:
307	  stack->gp_regs[gpcount++] = *(UINT16 *)avalue[i];
308	  break;
309	case FFI_TYPE_SINT32:
310	  stack->gp_regs[gpcount++] = *(SINT32 *)avalue[i];
311	  break;
312	case FFI_TYPE_UINT32:
313	  stack->gp_regs[gpcount++] = *(UINT32 *)avalue[i];
314	  break;
315	case FFI_TYPE_SINT64:
316	case FFI_TYPE_UINT64:
317	  stack->gp_regs[gpcount++] = *(UINT64 *)avalue[i];
318	  break;
319
320	case FFI_TYPE_POINTER:
321	  stack->gp_regs[gpcount++] = (UINT64)(PTR64) *(void **)avalue[i];
322	  break;
323
324	case FFI_TYPE_FLOAT:
325	  if (gpcount < 8 && fpcount < 8)
326	    stf_spill (&stack->fp_regs[fpcount++], *(float *)avalue[i]);
327	  stack->gp_regs[gpcount++] = *(UINT32 *)avalue[i];
328	  break;
329
330	case FFI_TYPE_DOUBLE:
331	  if (gpcount < 8 && fpcount < 8)
332	    stf_spill (&stack->fp_regs[fpcount++], *(double *)avalue[i]);
333	  stack->gp_regs[gpcount++] = *(UINT64 *)avalue[i];
334	  break;
335
336	case FFI_TYPE_LONGDOUBLE:
337	  if (gpcount & 1)
338	    gpcount++;
339	  if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8)
340	    stf_spill (&stack->fp_regs[fpcount++], *(__float80 *)avalue[i]);
341	  memcpy (&stack->gp_regs[gpcount], avalue[i], 16);
342	  gpcount += 2;
343	  break;
344
345	case FFI_TYPE_STRUCT:
346	  {
347	    size_t size = (*p_arg)->size;
348	    size_t align = (*p_arg)->alignment;
349	    int hfa_type = hfa_element_type (*p_arg, 0);
350
351	    FFI_ASSERT (align <= 16);
352	    if (align == 16 && (gpcount & 1))
353	      gpcount++;
354
355	    if (hfa_type != FFI_TYPE_VOID)
356	      {
357		size_t hfa_size = hfa_type_size (hfa_type);
358		size_t offset = 0;
359		size_t gp_offset = gpcount * 8;
360
361		while (fpcount < 8
362		       && offset < size
363		       && gp_offset < 8 * 8)
364		  {
365		    hfa_type_load (&stack->fp_regs[fpcount], hfa_type,
366				   avalue[i] + offset);
367		    offset += hfa_size;
368		    gp_offset += hfa_size;
369		    fpcount += 1;
370		  }
371	      }
372
373	    memcpy (&stack->gp_regs[gpcount], avalue[i], size);
374	    gpcount += (size + 7) / 8;
375	  }
376	  break;
377
378	default:
379	  abort ();
380	}
381    }
382
383  ffi_call_unix (stack, rvalue, fn, cif->flags);
384}
385
386/* Closures represent a pair consisting of a function pointer, and
387   some user data.  A closure is invoked by reinterpreting the closure
388   as a function pointer, and branching to it.  Thus we can make an
389   interpreted function callable as a C function: We turn the
390   interpreter itself, together with a pointer specifying the
391   interpreted procedure, into a closure.
392
393   For IA64, function pointer are already pairs consisting of a code
394   pointer, and a gp pointer.  The latter is needed to access global
395   variables.  Here we set up such a pair as the first two words of
396   the closure (in the "trampoline" area), but we replace the gp
397   pointer with a pointer to the closure itself.  We also add the real
398   gp pointer to the closure.  This allows the function entry code to
399   both retrieve the user data, and to restire the correct gp pointer.  */
400
401extern void ffi_closure_unix ();
402
403ffi_status
404ffi_prep_closure_loc (ffi_closure* closure,
405		      ffi_cif* cif,
406		      void (*fun)(ffi_cif*,void*,void**,void*),
407		      void *user_data,
408		      void *codeloc)
409{
410  /* The layout of a function descriptor.  A C function pointer really
411     points to one of these.  */
412  struct ia64_fd
413  {
414    UINT64 code_pointer;
415    UINT64 gp;
416  };
417
418  struct ffi_ia64_trampoline_struct
419  {
420    UINT64 code_pointer;	/* Pointer to ffi_closure_unix.  */
421    UINT64 fake_gp;		/* Pointer to closure, installed as gp.  */
422    UINT64 real_gp;		/* Real gp value.  */
423  };
424
425  struct ffi_ia64_trampoline_struct *tramp;
426  struct ia64_fd *fd;
427
428  FFI_ASSERT (cif->abi == FFI_UNIX);
429
430  tramp = (struct ffi_ia64_trampoline_struct *)closure->tramp;
431  fd = (struct ia64_fd *)(void *)ffi_closure_unix;
432
433  tramp->code_pointer = fd->code_pointer;
434  tramp->real_gp = fd->gp;
435  tramp->fake_gp = (UINT64)(PTR64)codeloc;
436  closure->cif = cif;
437  closure->user_data = user_data;
438  closure->fun = fun;
439
440  return FFI_OK;
441}
442
443
444UINT64
445ffi_closure_unix_inner (ffi_closure *closure, struct ia64_args *stack,
446			void *rvalue, void *r8)
447{
448  ffi_cif *cif;
449  void **avalue;
450  ffi_type **p_arg;
451  long i, avn, gpcount, fpcount;
452
453  cif = closure->cif;
454  avn = cif->nargs;
455  avalue = alloca (avn * sizeof (void *));
456
457  /* If the structure return value is passed in memory get that location
458     from r8 so as to pass the value directly back to the caller.  */
459  if (cif->flags == FFI_TYPE_STRUCT)
460    rvalue = r8;
461
462  gpcount = fpcount = 0;
463  for (i = 0, p_arg = cif->arg_types; i < avn; i++, p_arg++)
464    {
465      switch ((*p_arg)->type)
466	{
467	case FFI_TYPE_SINT8:
468	case FFI_TYPE_UINT8:
469	  avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 1);
470	  break;
471	case FFI_TYPE_SINT16:
472	case FFI_TYPE_UINT16:
473	  avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 2);
474	  break;
475	case FFI_TYPE_SINT32:
476	case FFI_TYPE_UINT32:
477	  avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], 4);
478	  break;
479	case FFI_TYPE_SINT64:
480	case FFI_TYPE_UINT64:
481	  avalue[i] = &stack->gp_regs[gpcount++];
482	  break;
483	case FFI_TYPE_POINTER:
484	  avalue[i] = endian_adjust(&stack->gp_regs[gpcount++], sizeof(void*));
485	  break;
486
487	case FFI_TYPE_FLOAT:
488	  if (gpcount < 8 && fpcount < 8)
489	    {
490	      fpreg *addr = &stack->fp_regs[fpcount++];
491	      float result;
492	      avalue[i] = addr;
493	      ldf_fill (result, addr);
494	      *(float *)addr = result;
495	    }
496	  else
497	    avalue[i] = endian_adjust(&stack->gp_regs[gpcount], 4);
498	  gpcount++;
499	  break;
500
501	case FFI_TYPE_DOUBLE:
502	  if (gpcount < 8 && fpcount < 8)
503	    {
504	      fpreg *addr = &stack->fp_regs[fpcount++];
505	      double result;
506	      avalue[i] = addr;
507	      ldf_fill (result, addr);
508	      *(double *)addr = result;
509	    }
510	  else
511	    avalue[i] = &stack->gp_regs[gpcount];
512	  gpcount++;
513	  break;
514
515	case FFI_TYPE_LONGDOUBLE:
516	  if (gpcount & 1)
517	    gpcount++;
518	  if (LDBL_MANT_DIG == 64 && gpcount < 8 && fpcount < 8)
519	    {
520	      fpreg *addr = &stack->fp_regs[fpcount++];
521	      __float80 result;
522	      avalue[i] = addr;
523	      ldf_fill (result, addr);
524	      *(__float80 *)addr = result;
525	    }
526	  else
527	    avalue[i] = &stack->gp_regs[gpcount];
528	  gpcount += 2;
529	  break;
530
531	case FFI_TYPE_STRUCT:
532	  {
533	    size_t size = (*p_arg)->size;
534	    size_t align = (*p_arg)->alignment;
535	    int hfa_type = hfa_element_type (*p_arg, 0);
536
537	    FFI_ASSERT (align <= 16);
538	    if (align == 16 && (gpcount & 1))
539	      gpcount++;
540
541	    if (hfa_type != FFI_TYPE_VOID)
542	      {
543		size_t hfa_size = hfa_type_size (hfa_type);
544		size_t offset = 0;
545		size_t gp_offset = gpcount * 8;
546		void *addr = alloca (size);
547
548		avalue[i] = addr;
549
550		while (fpcount < 8
551		       && offset < size
552		       && gp_offset < 8 * 8)
553		  {
554		    hfa_type_store (hfa_type, addr + offset,
555				    &stack->fp_regs[fpcount]);
556		    offset += hfa_size;
557		    gp_offset += hfa_size;
558		    fpcount += 1;
559		  }
560
561		if (offset < size)
562		  memcpy (addr + offset, (char *)stack->gp_regs + gp_offset,
563			  size - offset);
564	      }
565	    else
566	      avalue[i] = &stack->gp_regs[gpcount];
567
568	    gpcount += (size + 7) / 8;
569	  }
570	  break;
571
572	default:
573	  abort ();
574	}
575    }
576
577  closure->fun (cif, rvalue, avalue, closure->user_data);
578
579  return cif->flags;
580}
581