1/* -----------------------------------------------------------------------
2   darwin.S - Copyright (c) 2000 John Hornkvist
3	      Copyright (c) 2004, 2010 Free Software Foundation, Inc.
4
5   PowerPC Assembly glue.
6
7   Permission is hereby granted, free of charge, to any person obtaining
8   a copy of this software and associated documentation files (the
9   ``Software''), to deal in the Software without restriction, including
10   without limitation the rights to use, copy, modify, merge, publish,
11   distribute, sublicense, and/or sell copies of the Software, and to
12   permit persons to whom the Software is furnished to do so, subject to
13   the following conditions:
14
15   The above copyright notice and this permission notice shall be included
16   in all copies or substantial portions of the Software.
17
18   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
19   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
22   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24   OTHER DEALINGS IN THE SOFTWARE.
25   ----------------------------------------------------------------------- */
26
27#define LIBFFI_ASM
28#if defined(__ppc64__)
29#define MODE_CHOICE(x, y) y
30#else
31#define MODE_CHOICE(x, y) x
32#endif
33
34#define machine_choice	MODE_CHOICE(ppc7400,ppc64)
35
36; Define some pseudo-opcodes for size-independent load & store of GPRs ...
37#define lgu		MODE_CHOICE(lwzu, ldu)
38#define lg		MODE_CHOICE(lwz,ld)
39#define sg		MODE_CHOICE(stw,std)
40#define sgu		MODE_CHOICE(stwu,stdu)
41#define sgux		MODE_CHOICE(stwux,stdux)
42
43; ... and the size of GPRs and their storage indicator.
44#define GPR_BYTES	MODE_CHOICE(4,8)
45#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */
46#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */
47
48; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04.
49#define LINKAGE_SIZE	MODE_CHOICE(24,48)
50#define PARAM_AREA	MODE_CHOICE(32,64)
51#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */
52
53/* If there is any FP stuff we make space for all of the regs.  */
54#define SAVED_FPR_COUNT 13
55#define FPR_SIZE	8
56#define RESULT_BYTES	16
57
58/* This should be kept in step with the same value in ffi_darwin.c.  */
59#define ASM_NEEDS_REGISTERS 4
60#define SAVE_REGS_SIZE (ASM_NEEDS_REGISTERS * GPR_BYTES)
61
62#include <fficonfig.h>
63#include <ffi.h>
64
65#define JUMPTARGET(name) name
66#define L(x) x
67
68	.text
69	.align 2
70	.globl _ffi_prep_args
71
72	.align 2
73	.globl _ffi_call_DARWIN
74
75	/* We arrive here with:
76	   r3 = ptr to extended cif.
77	   r4 = -bytes.
78	   r5 = cif flags.
79	   r6 = ptr to return value.
80	   r7 = fn pointer (user func).
81	   r8 = fn pointer (ffi_prep_args).
82	   r9 = ffi_type* for the ret val.  */
83
84_ffi_call_DARWIN:
85Lstartcode:
86	mr   	r12,r8	/* We only need r12 until the call,
87			   so it does not have to be saved.  */
88LFB1:
89	/* Save the old stack pointer as AP.  */
90	mr	r8,r1
91LCFI0:
92
93	/* Save the retval type in parents frame.  */
94	sg	r9,(LINKAGE_SIZE+6*GPR_BYTES)(r8)
95
96	/* Allocate the stack space we need.  */
97	sgux	r1,r1,r4
98
99	/* Save registers we use.  */
100	mflr	r9
101	sg	r9,SAVED_LR_OFFSET(r8)
102
103	sg	r28,-(4 * GPR_BYTES)(r8)
104	sg	r29,-(3 * GPR_BYTES)(r8)
105	sg	r30,-(2 * GPR_BYTES)(r8)
106	sg	r31,-(    GPR_BYTES)(r8)
107
108#if !defined(POWERPC_DARWIN)
109	/* The TOC slot is reserved in the Darwin ABI and r2 is volatile.  */
110	sg	r2,(5 * GPR_BYTES)(r1)
111#endif
112
113LCFI1:
114
115	/* Save arguments over call.  */
116	mr	r31,r5	/* flags,  */
117	mr	r30,r6	/* rvalue,  */
118	mr	r29,r7	/* function address,  */
119	mr	r28,r8	/* our AP.  */
120LCFI2:
121	/* Call ffi_prep_args. r3 = extended cif, r4 = stack ptr copy.  */
122	mr	r4,r1
123	li	r9,0
124
125	mtctr	r12 /* r12 holds address of _ffi_prep_args.  */
126	bctrl
127
128#if !defined(POWERPC_DARWIN)
129	/* The TOC slot is reserved in the Darwin ABI and r2 is volatile.  */
130	lg     r2,(5 * GPR_BYTES)(r1)
131#endif
132	/* Now do the call.
133	   Set up cr1 with bits 4-7 of the flags.  */
134	mtcrf	0x40,r31
135	/* Get the address to call into CTR.  */
136	mtctr	r29
137	/* Load all those argument registers.
138	   We have set up a nice stack frame, just load it into registers.  */
139	lg     r3, (LINKAGE_SIZE                )(r1)
140	lg     r4, (LINKAGE_SIZE +     GPR_BYTES)(r1)
141	lg     r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r1)
142	lg     r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r1)
143	nop
144	lg     r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r1)
145	lg     r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r1)
146	lg     r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r1)
147	lg     r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r1)
148
149L1:
150	/* ... Load all the FP registers.  */
151	bf	6,L2	/* No floats to load.  */
152	lfd	f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28)
153	lfd	f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28)
154	lfd	f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28)
155	lfd	f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28)
156	nop
157	lfd	f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28)
158	lfd	f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28)
159	lfd	f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28)
160	lfd	f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28)
161	nop
162	lfd     f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28)
163	lfd     f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28)
164	lfd     f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28)
165	lfd     f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28)
166	nop
167	lfd     f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28)
168
169L2:
170	mr	r12,r29	/* Put the target address in r12 as specified.  */
171	mtctr  	r12
172	nop
173	nop
174
175	/* Make the call.  */
176	bctrl
177
178	/* Now, deal with the return value.  */
179
180	/* m64 structure returns can occupy the same set of registers as
181	   would be used to pass such a structure as arg0 - so take care
182	   not to step on any possibly hot regs.  */
183
184	/* Get the flags.. */
185	mtcrf	0x03,r31 ; we need c6 & cr7 now.
186	; FLAG_RETURNS_NOTHING also covers struct ret-by-ref.
187	bt	30,L(done_return_value)	  ; FLAG_RETURNS_NOTHING
188	bf	27,L(scalar_return_value) ; not FLAG_RETURNS_STRUCT
189
190	/* OK, so we have a struct.  */
191#if defined(__ppc64__)
192	bt	31,L(maybe_return_128) ; FLAG_RETURNS_128BITS, special case
193
194	/* OK, we have to map the return back to a mem struct.
195	   We are about to trample the parents param area, so recover the
196	   return type.  r29 is free, since the call is done.  */
197	lg	r29,(LINKAGE_SIZE + 6 * GPR_BYTES)(r28)
198
199	sg	r3, (LINKAGE_SIZE                )(r28)
200	sg	r4, (LINKAGE_SIZE +     GPR_BYTES)(r28)
201	sg	r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r28)
202	sg	r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r28)
203	nop
204	sg	r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r28)
205	sg	r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r28)
206	sg	r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r28)
207	sg	r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28)
208	/* OK, so do the block move - we trust that memcpy will not trample
209	   the fprs...  */
210	mr 	r3,r30 ; dest
211	addi	r4,r28,LINKAGE_SIZE ; source
212	/* The size is a size_t, should be long.  */
213	lg	r5,0(r29)
214	/* Figure out small structs */
215	cmpi	0,r5,4
216	bgt	L3	; 1, 2 and 4 bytes have special rules.
217	cmpi	0,r5,3
218	beq	L3	; not 3
219	addi	r4,r4,8
220	subf	r4,r5,r4
221L3:
222	bl	_memcpy
223
224	/* ... do we need the FP registers? - recover the flags.. */
225	mtcrf	0x03,r31 ; we need c6 & cr7 now.
226	bf	29,L(done_return_value)	/* No floats in the struct.  */
227	stfd	f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28)
228	stfd	f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28)
229	stfd	f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28)
230	stfd	f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28)
231	nop
232	stfd	f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28)
233	stfd	f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28)
234	stfd	f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28)
235	stfd	f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28)
236	nop
237	stfd	f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28)
238	stfd	f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28)
239	stfd	f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28)
240	stfd	f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28)
241	nop
242	stfd	f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28)
243
244	mr	r3,r29	; ffi_type *
245	mr	r4,r30	; dest
246	addi	r5,r28,-SAVE_REGS_SIZE-(13*FPR_SIZE) ; fprs
247	xor	r6,r6,r6
248	sg	r6,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28)
249	addi	r6,r28,(LINKAGE_SIZE + 7 * GPR_BYTES) ; point to a zeroed counter.
250	bl 	_darwin64_struct_floats_to_mem
251
252	b L(done_return_value)
253#else
254	stw	r3,0(r30) ; m32 the only struct return in reg is 4 bytes.
255#endif
256	b L(done_return_value)
257
258L(fp_return_value):
259	/* Do we have long double to store?  */
260	bf	31,L(fd_return_value) ; FLAG_RETURNS_128BITS
261	stfd	f1,0(r30)
262	stfd	f2,FPR_SIZE(r30)
263	b	L(done_return_value)
264
265L(fd_return_value):
266	/* Do we have double to store?  */
267	bf	28,L(float_return_value)
268	stfd	f1,0(r30)
269	b	L(done_return_value)
270
271L(float_return_value):
272	/* We only have a float to store.  */
273	stfs	f1,0(r30)
274	b	L(done_return_value)
275
276L(scalar_return_value):
277	bt	29,L(fp_return_value)	; FLAG_RETURNS_FP
278	; ffi_arg is defined as unsigned long.
279	sg	r3,0(r30)		; Save the reg.
280	bf	28,L(done_return_value) ; not FLAG_RETURNS_64BITS
281
282#if defined(__ppc64__)
283L(maybe_return_128):
284	std	r3,0(r30)
285	bf	31,L(done_return_value) ; not FLAG_RETURNS_128BITS
286	std	r4,8(r30)
287#else
288	stw	r4,4(r30)
289#endif
290
291	/* Fall through.  */
292	/* We want this at the end to simplify eh epilog computation.  */
293
294L(done_return_value):
295	/* Restore the registers we used and return.  */
296	lg	r29,SAVED_LR_OFFSET(r28)
297	; epilog
298	lg	r31,-(1 * GPR_BYTES)(r28)
299	mtlr	r29
300	lg	r30,-(2 * GPR_BYTES)(r28)
301	lg	r29,-(3 * GPR_BYTES)(r28)
302	lg	r28,-(4 * GPR_BYTES)(r28)
303	lg	r1,0(r1)
304	blr
305LFE1:
306	.align	1
307/* END(_ffi_call_DARWIN)  */
308
309/* Provide a null definition of _ffi_call_AIX.  */
310	.text
311	.globl _ffi_call_AIX
312	.align 2
313_ffi_call_AIX:
314	blr
315/* END(_ffi_call_AIX)  */
316
317/* EH stuff.  */
318
319#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78)
320
321	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
322EH_frame1:
323	.set	L$set$0,LECIE1-LSCIE1
324	.long	L$set$0	; Length of Common Information Entry
325LSCIE1:
326	.long	0x0	; CIE Identifier Tag
327	.byte	0x1	; CIE Version
328	.ascii	"zR\0"	; CIE Augmentation
329	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
330	.byte	EH_DATA_ALIGN_FACT ; sleb128 -4; CIE Data Alignment Factor
331	.byte	0x41	; CIE RA Column
332	.byte	0x1	; uleb128 0x1; Augmentation size
333	.byte	0x10	; FDE Encoding (pcrel)
334	.byte	0xc	; DW_CFA_def_cfa
335	.byte	0x1	; uleb128 0x1
336	.byte	0x0	; uleb128 0x0
337	.align	LOG2_GPR_BYTES
338LECIE1:
339
340	.globl _ffi_call_DARWIN.eh
341_ffi_call_DARWIN.eh:
342LSFDE1:
343	.set	L$set$1,LEFDE1-LASFDE1
344	.long	L$set$1	; FDE Length
345LASFDE1:
346	.long	LASFDE1-EH_frame1 ; FDE CIE offset
347	.g_long	Lstartcode-.	; FDE initial location
348	.set	L$set$3,LFE1-Lstartcode
349	.g_long	L$set$3	; FDE address range
350	.byte   0x0     ; uleb128 0x0; Augmentation size
351	.byte	0x4	; DW_CFA_advance_loc4
352	.set	L$set$4,LCFI0-Lstartcode
353	.long	L$set$4
354	.byte	0xd	; DW_CFA_def_cfa_register
355	.byte	0x08	; uleb128 0x08
356	.byte	0x4	; DW_CFA_advance_loc4
357	.set	L$set$5,LCFI1-LCFI0
358	.long	L$set$5
359	.byte   0x11    ; DW_CFA_offset_extended_sf
360	.byte	0x41	; uleb128 0x41
361	.byte   0x7e    ; sleb128 -2
362	.byte	0x9f	; DW_CFA_offset, column 0x1f
363	.byte	0x1	; uleb128 0x1
364	.byte	0x9e	; DW_CFA_offset, column 0x1e
365	.byte	0x2	; uleb128 0x2
366	.byte	0x9d	; DW_CFA_offset, column 0x1d
367	.byte	0x3	; uleb128 0x3
368	.byte	0x9c	; DW_CFA_offset, column 0x1c
369	.byte	0x4	; uleb128 0x4
370	.byte	0x4	; DW_CFA_advance_loc4
371	.set	L$set$6,LCFI2-LCFI1
372	.long	L$set$6
373	.byte	0xd	; DW_CFA_def_cfa_register
374	.byte	0x1c	; uleb128 0x1c
375	.align LOG2_GPR_BYTES
376LEFDE1:
377	.align 1
378
379