1/* -----------------------------------------------------------------------
2   unix.S - Copyright (c) 1998, 2008 Red Hat, Inc.
3            Copyright (c) 2000 Hewlett Packard Company
4
5   IA64/unix Foreign Function Interface
6
7   Primary author: Hans Boehm, HP Labs
8
9   Loosely modeled on Cygnus code for other platforms.
10
11   Permission is hereby granted, free of charge, to any person obtaining
12   a copy of this software and associated documentation files (the
13   ``Software''), to deal in the Software without restriction, including
14   without limitation the rights to use, copy, modify, merge, publish,
15   distribute, sublicense, and/or sell copies of the Software, and to
16   permit persons to whom the Software is furnished to do so, subject to
17   the following conditions:
18
19   The above copyright notice and this permission notice shall be included
20   in all copies or substantial portions of the Software.
21
22   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
23   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
27   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
29   DEALINGS IN THE SOFTWARE.
30   ----------------------------------------------------------------------- */
31
32#define LIBFFI_ASM
33#include <fficonfig.h>
34#include <ffi.h>
35#include "ia64_flags.h"
36
37	.pred.safe_across_calls p1-p5,p16-p63
38.text
39
40/* int ffi_call_unix (struct ia64_args *stack, PTR64 rvalue,
41		      void (*fn)(void), int flags);
42 */
43
44        .align 16
45        .global	ffi_call_unix
46        .proc	ffi_call_unix
47ffi_call_unix:
48	.prologue
49	/* Bit o trickiness.  We actually share a stack frame with ffi_call.
50	   Rely on the fact that ffi_call uses a vframe and don't bother
51	   tracking one here at all.  */
52	.fframe	0
53	.save	ar.pfs, r36 // loc0
54	alloc   loc0 = ar.pfs, 4, 3, 8, 0
55	.save	rp, loc1
56	mov 	loc1 = b0
57	.body
58	add	r16 = 16, in0
59	mov	loc2 = gp
60	mov	r8 = in1
61	;;
62
63	/* Load up all of the argument registers.  */
64	ldf.fill f8 = [in0], 32
65	ldf.fill f9 = [r16], 32
66	;;
67	ldf.fill f10 = [in0], 32
68	ldf.fill f11 = [r16], 32
69	;;
70	ldf.fill f12 = [in0], 32
71	ldf.fill f13 = [r16], 32
72	;;
73	ldf.fill f14 = [in0], 32
74	ldf.fill f15 = [r16], 24
75	;;
76	ld8	out0 = [in0], 16
77	ld8	out1 = [r16], 16
78	;;
79	ld8	out2 = [in0], 16
80	ld8	out3 = [r16], 16
81	;;
82	ld8	out4 = [in0], 16
83	ld8	out5 = [r16], 16
84	;;
85	ld8	out6 = [in0]
86	ld8	out7 = [r16]
87	;;
88
89	/* Deallocate the register save area from the stack frame.  */
90	mov	sp = in0
91
92	/* Call the target function.  */
93	ld8	r16 = [in2], 8
94	;;
95	ld8	gp = [in2]
96	mov	b6 = r16
97	br.call.sptk.many b0 = b6
98	;;
99
100	/* Dispatch to handle return value.  */
101	mov	gp = loc2
102	zxt1	r16 = in3
103	;;
104	mov	ar.pfs = loc0
105	addl	r18 = @ltoffx(.Lst_table), gp
106	;;
107	ld8.mov	r18 = [r18], .Lst_table
108	mov	b0 = loc1
109	;;
110	shladd	r18 = r16, 3, r18
111	;;
112	ld8	r17 = [r18]
113	shr	in3 = in3, 8
114	;;
115	add	r17 = r17, r18
116	;;
117	mov	b6 = r17
118	br	b6
119	;;
120
121.Lst_void:
122	br.ret.sptk.many b0
123	;;
124.Lst_uint8:
125	zxt1	r8 = r8
126	;;
127	st8	[in1] = r8
128	br.ret.sptk.many b0
129	;;
130.Lst_sint8:
131	sxt1	r8 = r8
132	;;
133	st8	[in1] = r8
134	br.ret.sptk.many b0
135	;;
136.Lst_uint16:
137	zxt2	r8 = r8
138	;;
139	st8	[in1] = r8
140	br.ret.sptk.many b0
141	;;
142.Lst_sint16:
143	sxt2	r8 = r8
144	;;
145	st8	[in1] = r8
146	br.ret.sptk.many b0
147	;;
148.Lst_uint32:
149	zxt4	r8 = r8
150	;;
151	st8	[in1] = r8
152	br.ret.sptk.many b0
153	;;
154.Lst_sint32:
155	sxt4	r8 = r8
156	;;
157	st8	[in1] = r8
158	br.ret.sptk.many b0
159	;;
160.Lst_int64:
161	st8	[in1] = r8
162	br.ret.sptk.many b0
163	;;
164.Lst_float:
165	stfs	[in1] = f8
166	br.ret.sptk.many b0
167	;;
168.Lst_double:
169	stfd	[in1] = f8
170	br.ret.sptk.many b0
171	;;
172.Lst_ldouble:
173	stfe	[in1] = f8
174	br.ret.sptk.many b0
175	;;
176
177.Lst_small_struct:
178	add	sp = -16, sp
179	cmp.lt	p6, p0 = 8, in3
180	cmp.lt	p7, p0 = 16, in3
181	cmp.lt	p8, p0 = 24, in3
182	;;
183	add	r16 = 8, sp
184	add	r17 = 16, sp
185	add	r18 = 24, sp
186	;;
187	st8	[sp] = r8
188(p6)	st8	[r16] = r9
189	mov	out0 = in1
190(p7)	st8	[r17] = r10
191(p8)	st8	[r18] = r11
192	mov	out1 = sp
193	mov	out2 = in3
194	br.call.sptk.many b0 = memcpy#
195	;;
196	mov	ar.pfs = loc0
197	mov	b0 = loc1
198	mov	gp = loc2
199	br.ret.sptk.many b0
200
201.Lst_hfa_float:
202	add	r16 = 4, in1
203	cmp.lt	p6, p0 = 4, in3
204	;;
205	stfs	[in1] = f8, 8
206(p6)	stfs	[r16] = f9, 8
207	cmp.lt	p7, p0 = 8, in3
208	cmp.lt	p8, p0 = 12, in3
209	;;
210(p7)	stfs	[in1] = f10, 8
211(p8)	stfs	[r16] = f11, 8
212	cmp.lt	p9, p0 = 16, in3
213	cmp.lt	p10, p0 = 20, in3
214	;;
215(p9)	stfs	[in1] = f12, 8
216(p10)	stfs	[r16] = f13, 8
217	cmp.lt	p6, p0 = 24, in3
218	cmp.lt	p7, p0 = 28, in3
219	;;
220(p6)	stfs	[in1] = f14
221(p7)	stfs	[r16] = f15
222	br.ret.sptk.many b0
223	;;
224
225.Lst_hfa_double:
226	add	r16 = 8, in1
227	cmp.lt	p6, p0 = 8, in3
228	;;
229	stfd	[in1] = f8, 16
230(p6)	stfd	[r16] = f9, 16
231	cmp.lt	p7, p0 = 16, in3
232	cmp.lt	p8, p0 = 24, in3
233	;;
234(p7)	stfd	[in1] = f10, 16
235(p8)	stfd	[r16] = f11, 16
236	cmp.lt	p9, p0 = 32, in3
237	cmp.lt	p10, p0 = 40, in3
238	;;
239(p9)	stfd	[in1] = f12, 16
240(p10)	stfd	[r16] = f13, 16
241	cmp.lt	p6, p0 = 48, in3
242	cmp.lt	p7, p0 = 56, in3
243	;;
244(p6)	stfd	[in1] = f14
245(p7)	stfd	[r16] = f15
246	br.ret.sptk.many b0
247	;;
248
249.Lst_hfa_ldouble:
250	add	r16 = 16, in1
251	cmp.lt	p6, p0 = 16, in3
252	;;
253	stfe	[in1] = f8, 32
254(p6)	stfe	[r16] = f9, 32
255	cmp.lt	p7, p0 = 32, in3
256	cmp.lt	p8, p0 = 48, in3
257	;;
258(p7)	stfe	[in1] = f10, 32
259(p8)	stfe	[r16] = f11, 32
260	cmp.lt	p9, p0 = 64, in3
261	cmp.lt	p10, p0 = 80, in3
262	;;
263(p9)	stfe	[in1] = f12, 32
264(p10)	stfe	[r16] = f13, 32
265	cmp.lt	p6, p0 = 96, in3
266	cmp.lt	p7, p0 = 112, in3
267	;;
268(p6)	stfe	[in1] = f14
269(p7)	stfe	[r16] = f15
270	br.ret.sptk.many b0
271	;;
272
273        .endp ffi_call_unix
274
275        .align 16
276        .global ffi_closure_unix
277        .proc ffi_closure_unix
278
279#define FRAME_SIZE	(8*16 + 8*8 + 8*16)
280
281ffi_closure_unix:
282	.prologue
283	.save	ar.pfs, r40 // loc0
284	alloc   loc0 = ar.pfs, 8, 4, 4, 0
285	.fframe	FRAME_SIZE
286	add	r12 = -FRAME_SIZE, r12
287	.save	rp, loc1
288	mov	loc1 = b0
289	.save	ar.unat, loc2
290	mov	loc2 = ar.unat
291	.body
292
293	/* Retrieve closure pointer and real gp.  */
294#ifdef _ILP32
295	addp4	out0 = 0, gp
296	addp4	gp = 16, gp
297#else
298	mov	out0 = gp
299	add	gp = 16, gp
300#endif
301	;;
302	ld8	gp = [gp]
303
304	/* Spill all of the possible argument registers.  */
305	add	r16 = 16 + 8*16, sp
306	add	r17 = 16 + 8*16 + 16, sp
307	;;
308	stf.spill [r16] = f8, 32
309	stf.spill [r17] = f9, 32
310	mov	loc3 = gp
311	;;
312	stf.spill [r16] = f10, 32
313	stf.spill [r17] = f11, 32
314	;;
315	stf.spill [r16] = f12, 32
316	stf.spill [r17] = f13, 32
317	;;
318	stf.spill [r16] = f14, 32
319	stf.spill [r17] = f15, 24
320	;;
321	.mem.offset 0, 0
322	st8.spill [r16] = in0, 16
323	.mem.offset 8, 0
324	st8.spill [r17] = in1, 16
325	add	out1 = 16 + 8*16, sp
326	;;
327	.mem.offset 0, 0
328	st8.spill [r16] = in2, 16
329	.mem.offset 8, 0
330	st8.spill [r17] = in3, 16
331	add	out2 = 16, sp
332	;;
333	.mem.offset 0, 0
334	st8.spill [r16] = in4, 16
335	.mem.offset 8, 0
336	st8.spill [r17] = in5, 16
337	mov	out3 = r8
338	;;
339	.mem.offset 0, 0
340	st8.spill [r16] = in6
341	.mem.offset 8, 0
342	st8.spill [r17] = in7
343
344	/* Invoke ffi_closure_unix_inner for the hard work.  */
345	br.call.sptk.many b0 = ffi_closure_unix_inner
346	;;
347
348	/* Dispatch to handle return value.  */
349	mov	gp = loc3
350	zxt1	r16 = r8
351	;;
352	addl	r18 = @ltoffx(.Lld_table), gp
353	mov	ar.pfs = loc0
354	;;
355	ld8.mov	r18 = [r18], .Lld_table
356	mov	b0 = loc1
357	;;
358	shladd	r18 = r16, 3, r18
359	mov	ar.unat = loc2
360	;;
361	ld8	r17 = [r18]
362	shr	r8 = r8, 8
363	;;
364	add	r17 = r17, r18
365	add	r16 = 16, sp
366	;;
367	mov	b6 = r17
368	br	b6
369	;;
370	.label_state 1
371
372.Lld_void:
373	.restore sp
374	add	sp = FRAME_SIZE, sp
375	br.ret.sptk.many b0
376	;;
377.Lld_int:
378	.body
379	.copy_state 1
380	ld8	r8 = [r16]
381	.restore sp
382	add	sp = FRAME_SIZE, sp
383	br.ret.sptk.many b0
384	;;
385.Lld_float:
386	.body
387	.copy_state 1
388	ldfs	f8 = [r16]
389	.restore sp
390	add	sp = FRAME_SIZE, sp
391	br.ret.sptk.many b0
392	;;
393.Lld_double:
394	.body
395	.copy_state 1
396	ldfd	f8 = [r16]
397	.restore sp
398	add	sp = FRAME_SIZE, sp
399	br.ret.sptk.many b0
400	;;
401.Lld_ldouble:
402	.body
403	.copy_state 1
404	ldfe	f8 = [r16]
405	.restore sp
406	add	sp = FRAME_SIZE, sp
407	br.ret.sptk.many b0
408	;;
409
410.Lld_small_struct:
411	.body
412	.copy_state 1
413	add	r17 = 8, r16
414	cmp.lt	p6, p0 = 8, r8
415	cmp.lt	p7, p0 = 16, r8
416	cmp.lt	p8, p0 = 24, r8
417	;;
418	ld8	r8 = [r16], 16
419(p6)	ld8	r9 = [r17], 16
420	;;
421(p7)	ld8	r10 = [r16]
422(p8)	ld8	r11 = [r17]
423	.restore sp
424	add	sp = FRAME_SIZE, sp
425	br.ret.sptk.many b0
426	;;
427
428.Lld_hfa_float:
429	.body
430	.copy_state 1
431	add	r17 = 4, r16
432	cmp.lt	p6, p0 = 4, r8
433	;;
434	ldfs	f8 = [r16], 8
435(p6)	ldfs	f9 = [r17], 8
436	cmp.lt	p7, p0 = 8, r8
437	cmp.lt	p8, p0 = 12, r8
438	;;
439(p7)	ldfs	f10 = [r16], 8
440(p8)	ldfs	f11 = [r17], 8
441	cmp.lt	p9, p0 = 16, r8
442	cmp.lt	p10, p0 = 20, r8
443	;;
444(p9)	ldfs	f12 = [r16], 8
445(p10)	ldfs	f13 = [r17], 8
446	cmp.lt	p6, p0 = 24, r8
447	cmp.lt	p7, p0 = 28, r8
448	;;
449(p6)	ldfs	f14 = [r16]
450(p7)	ldfs	f15 = [r17]
451	.restore sp
452	add	sp = FRAME_SIZE, sp
453	br.ret.sptk.many b0
454	;;
455
456.Lld_hfa_double:
457	.body
458	.copy_state 1
459	add	r17 = 8, r16
460	cmp.lt	p6, p0 = 8, r8
461	;;
462	ldfd	f8 = [r16], 16
463(p6)	ldfd	f9 = [r17], 16
464	cmp.lt	p7, p0 = 16, r8
465	cmp.lt	p8, p0 = 24, r8
466	;;
467(p7)	ldfd	f10 = [r16], 16
468(p8)	ldfd	f11 = [r17], 16
469	cmp.lt	p9, p0 = 32, r8
470	cmp.lt	p10, p0 = 40, r8
471	;;
472(p9)	ldfd	f12 = [r16], 16
473(p10)	ldfd	f13 = [r17], 16
474	cmp.lt	p6, p0 = 48, r8
475	cmp.lt	p7, p0 = 56, r8
476	;;
477(p6)	ldfd	f14 = [r16]
478(p7)	ldfd	f15 = [r17]
479	.restore sp
480	add	sp = FRAME_SIZE, sp
481	br.ret.sptk.many b0
482	;;
483
484.Lld_hfa_ldouble:
485	.body
486	.copy_state 1
487	add	r17 = 16, r16
488	cmp.lt	p6, p0 = 16, r8
489	;;
490	ldfe	f8 = [r16], 32
491(p6)	ldfe	f9 = [r17], 32
492	cmp.lt	p7, p0 = 32, r8
493	cmp.lt	p8, p0 = 48, r8
494	;;
495(p7)	ldfe	f10 = [r16], 32
496(p8)	ldfe	f11 = [r17], 32
497	cmp.lt	p9, p0 = 64, r8
498	cmp.lt	p10, p0 = 80, r8
499	;;
500(p9)	ldfe	f12 = [r16], 32
501(p10)	ldfe	f13 = [r17], 32
502	cmp.lt	p6, p0 = 96, r8
503	cmp.lt	p7, p0 = 112, r8
504	;;
505(p6)	ldfe	f14 = [r16]
506(p7)	ldfe	f15 = [r17]
507	.restore sp
508	add	sp = FRAME_SIZE, sp
509	br.ret.sptk.many b0
510	;;
511
512	.endp	ffi_closure_unix
513
514	.section .rodata
515	.align	8
516.Lst_table:
517	data8	@pcrel(.Lst_void)		// FFI_TYPE_VOID
518	data8	@pcrel(.Lst_sint32)		// FFI_TYPE_INT
519	data8	@pcrel(.Lst_float)		// FFI_TYPE_FLOAT
520	data8	@pcrel(.Lst_double)		// FFI_TYPE_DOUBLE
521	data8	@pcrel(.Lst_ldouble)		// FFI_TYPE_LONGDOUBLE
522	data8	@pcrel(.Lst_uint8)		// FFI_TYPE_UINT8
523	data8	@pcrel(.Lst_sint8)		// FFI_TYPE_SINT8
524	data8	@pcrel(.Lst_uint16)		// FFI_TYPE_UINT16
525	data8	@pcrel(.Lst_sint16)		// FFI_TYPE_SINT16
526	data8	@pcrel(.Lst_uint32)		// FFI_TYPE_UINT32
527	data8	@pcrel(.Lst_sint32)		// FFI_TYPE_SINT32
528	data8	@pcrel(.Lst_int64)		// FFI_TYPE_UINT64
529	data8	@pcrel(.Lst_int64)		// FFI_TYPE_SINT64
530	data8	@pcrel(.Lst_void)		// FFI_TYPE_STRUCT
531	data8	@pcrel(.Lst_int64)		// FFI_TYPE_POINTER
532	data8 	@pcrel(.Lst_small_struct)	// FFI_IA64_TYPE_SMALL_STRUCT
533	data8	@pcrel(.Lst_hfa_float)		// FFI_IA64_TYPE_HFA_FLOAT
534	data8	@pcrel(.Lst_hfa_double)		// FFI_IA64_TYPE_HFA_DOUBLE
535	data8	@pcrel(.Lst_hfa_ldouble)	// FFI_IA64_TYPE_HFA_LDOUBLE
536
537.Lld_table:
538	data8	@pcrel(.Lld_void)		// FFI_TYPE_VOID
539	data8	@pcrel(.Lld_int)		// FFI_TYPE_INT
540	data8	@pcrel(.Lld_float)		// FFI_TYPE_FLOAT
541	data8	@pcrel(.Lld_double)		// FFI_TYPE_DOUBLE
542	data8	@pcrel(.Lld_ldouble)		// FFI_TYPE_LONGDOUBLE
543	data8	@pcrel(.Lld_int)		// FFI_TYPE_UINT8
544	data8	@pcrel(.Lld_int)		// FFI_TYPE_SINT8
545	data8	@pcrel(.Lld_int)		// FFI_TYPE_UINT16
546	data8	@pcrel(.Lld_int)		// FFI_TYPE_SINT16
547	data8	@pcrel(.Lld_int)		// FFI_TYPE_UINT32
548	data8	@pcrel(.Lld_int)		// FFI_TYPE_SINT32
549	data8	@pcrel(.Lld_int)		// FFI_TYPE_UINT64
550	data8	@pcrel(.Lld_int)		// FFI_TYPE_SINT64
551	data8	@pcrel(.Lld_void)		// FFI_TYPE_STRUCT
552	data8	@pcrel(.Lld_int)		// FFI_TYPE_POINTER
553	data8 	@pcrel(.Lld_small_struct)	// FFI_IA64_TYPE_SMALL_STRUCT
554	data8	@pcrel(.Lld_hfa_float)		// FFI_IA64_TYPE_HFA_FLOAT
555	data8	@pcrel(.Lld_hfa_double)		// FFI_IA64_TYPE_HFA_DOUBLE
556	data8	@pcrel(.Lld_hfa_ldouble)	// FFI_IA64_TYPE_HFA_LDOUBLE
557
558#if defined __ELF__ && defined __linux__
559	.section	.note.GNU-stack,"",@progbits
560#endif
561