tile.S revision 457ba79995d512b9e8c07061fe10d4cd88273b23
1/* -----------------------------------------------------------------------
2   tile.S - Copyright (c) 2011 Tilera Corp.
3
4   Tilera TILEPro and TILE-Gx Foreign Function Interface
5
6   Permission is hereby granted, free of charge, to any person obtaining
7   a copy of this software and associated documentation files (the
8   ``Software''), to deal in the Software without restriction, including
9   without limitation the rights to use, copy, modify, merge, publish,
10   distribute, sublicense, and/or sell copies of the Software, and to
11   permit persons to whom the Software is furnished to do so, subject to
12   the following conditions:
13
14   The above copyright notice and this permission notice shall be included
15   in all copies or substantial portions of the Software.
16
17   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
18   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24   DEALINGS IN THE SOFTWARE.
25   ----------------------------------------------------------------------- */
26
27#define LIBFFI_ASM
28#include <fficonfig.h>
29#include <ffi.h>
30
31/* Number of bytes in a register. */
32#define REG_SIZE FFI_SIZEOF_ARG
33
34/* Number of bytes in stack linkage area for backtracing.
35
36   A note about the ABI: on entry to a procedure, sp points to a stack
37   slot where it must spill the return address if it's not a leaf.
38   REG_SIZE bytes beyond that is a slot owned by the caller which
39   contains the sp value that the caller had when it was originally
40   entered (i.e. the caller's frame pointer). */
41#define LINKAGE_SIZE (2 * REG_SIZE)
42
43/* The first 10 registers are used to pass arguments and return values. */
44#define NUM_ARG_REGS 10
45
46#ifdef __tilegx__
47#define SW st
48#define LW ld
49#define BGZT bgtzt
50#else
51#define SW sw
52#define LW lw
53#define BGZT bgzt
54#endif
55
56
57/* void ffi_call_tile (int_reg_t reg_args[NUM_ARG_REGS],
58                       const int_reg_t *stack_args,
59                       unsigned long stack_args_bytes,
60                       void (*fnaddr)(void));
61
62        On entry, REG_ARGS contain the outgoing register values,
63        and STACK_ARGS contains STACK_ARG_BYTES of additional values
64        to be passed on the stack. If STACK_ARG_BYTES is zero, then
65        STACK_ARGS is ignored.
66
67        When the invoked function returns, the values of r0-r9 are
68        blindly stored back into REG_ARGS for the caller to examine. */
69
70        .section .text.ffi_call_tile, "ax", @progbits
71        .align  8
72        .globl  ffi_call_tile
73        FFI_HIDDEN(ffi_call_tile)
74ffi_call_tile:
75
76/* Incoming arguments. */
77#define REG_ARGS                r0
78#define INCOMING_STACK_ARGS     r1
79#define STACK_ARG_BYTES         r2
80#define ORIG_FNADDR             r3
81
82/* Temporary values. */
83#define FRAME_SIZE              r10
84#define TMP                     r11
85#define TMP2                    r12
86#define OUTGOING_STACK_ARGS     r13
87#define REG_ADDR_PTR            r14
88#define RETURN_REG_ADDR         r15
89#define FNADDR                  r16
90
91        .cfi_startproc
92        {
93         /* Save return address. */
94         SW     sp, lr
95         .cfi_offset lr, 0
96         /* Prepare to spill incoming r52. */
97         addi   TMP, sp, -REG_SIZE
98         /* Increase frame size to have room to spill r52 and REG_ARGS.
99            The +7 is to round up mod 8. */
100         addi   FRAME_SIZE, STACK_ARG_BYTES, \
101                REG_SIZE + REG_SIZE + LINKAGE_SIZE + 7
102        }
103        {
104         /* Round stack frame size to a multiple of 8 to satisfy ABI. */
105         andi   FRAME_SIZE, FRAME_SIZE, -8
106         /* Compute where to spill REG_ARGS value. */
107         addi   TMP2, sp, -(REG_SIZE * 2)
108        }
109        {
110         /* Spill incoming r52. */
111         SW     TMP, r52
112         .cfi_offset r52, -REG_SIZE
113         /* Set up our frame pointer. */
114         move   r52, sp
115         .cfi_def_cfa_register r52
116         /* Push stack frame. */
117         sub    sp, sp, FRAME_SIZE
118        }
119        {
120         /* Prepare to set up stack linkage. */
121         addi   TMP, sp, REG_SIZE
122         /* Prepare to memcpy stack args. */
123         addi   OUTGOING_STACK_ARGS, sp, LINKAGE_SIZE
124         /* Save REG_ARGS which we will need after we call the subroutine. */
125         SW     TMP2, REG_ARGS
126        }
127        {
128         /* Set up linkage info to hold incoming stack pointer. */
129         SW     TMP, r52
130        }
131        {
132         /* Skip stack args memcpy if we don't have any stack args (common). */
133         blezt  STACK_ARG_BYTES, .Ldone_stack_args_memcpy
134        }
135
136.Lmemcpy_stack_args:
137        {
138         /* Load incoming argument from stack_args. */
139         LW     TMP, INCOMING_STACK_ARGS
140         addi   INCOMING_STACK_ARGS, INCOMING_STACK_ARGS, REG_SIZE
141        }
142        {
143         /* Store stack argument into outgoing stack argument area. */
144         SW     OUTGOING_STACK_ARGS, TMP
145         addi   OUTGOING_STACK_ARGS, OUTGOING_STACK_ARGS, REG_SIZE
146         addi   STACK_ARG_BYTES, STACK_ARG_BYTES, -REG_SIZE
147        }
148        {
149         BGZT   STACK_ARG_BYTES, .Lmemcpy_stack_args
150        }
151.Ldone_stack_args_memcpy:
152
153        {
154         /* Copy aside ORIG_FNADDR so we can overwrite its register. */
155         move   FNADDR, ORIG_FNADDR
156         /* Prepare to load argument registers. */
157         addi   REG_ADDR_PTR, r0, REG_SIZE
158         /* Load outgoing r0. */
159         LW     r0, r0
160        }
161
162        /* Load up argument registers from the REG_ARGS array. */
163#define LOAD_REG(REG, PTR) \
164        { \
165         LW     REG, PTR ; \
166         addi   PTR, PTR, REG_SIZE \
167        }
168
169        LOAD_REG(r1, REG_ADDR_PTR)
170        LOAD_REG(r2, REG_ADDR_PTR)
171        LOAD_REG(r3, REG_ADDR_PTR)
172        LOAD_REG(r4, REG_ADDR_PTR)
173        LOAD_REG(r5, REG_ADDR_PTR)
174        LOAD_REG(r6, REG_ADDR_PTR)
175        LOAD_REG(r7, REG_ADDR_PTR)
176        LOAD_REG(r8, REG_ADDR_PTR)
177        LOAD_REG(r9, REG_ADDR_PTR)
178
179        {
180         /* Call the subroutine. */
181         jalr   FNADDR
182        }
183
184        {
185         /* Restore original lr. */
186         LW     lr, r52
187         /* Prepare to recover ARGS, which we spilled earlier. */
188         addi   TMP, r52, -(2 * REG_SIZE)
189        }
190        {
191         /* Restore ARGS, so we can fill it in with the return regs r0-r9. */
192         LW     RETURN_REG_ADDR, TMP
193         /* Prepare to restore original r52. */
194         addi   TMP, r52, -REG_SIZE
195        }
196
197        {
198         /* Pop stack frame. */
199         move   sp, r52
200         /* Restore original r52. */
201         LW     r52, TMP
202        }
203
204#define STORE_REG(REG, PTR) \
205        { \
206         SW     PTR, REG ; \
207         addi   PTR, PTR, REG_SIZE \
208        }
209
210        /* Return all register values by reference. */
211        STORE_REG(r0, RETURN_REG_ADDR)
212        STORE_REG(r1, RETURN_REG_ADDR)
213        STORE_REG(r2, RETURN_REG_ADDR)
214        STORE_REG(r3, RETURN_REG_ADDR)
215        STORE_REG(r4, RETURN_REG_ADDR)
216        STORE_REG(r5, RETURN_REG_ADDR)
217        STORE_REG(r6, RETURN_REG_ADDR)
218        STORE_REG(r7, RETURN_REG_ADDR)
219        STORE_REG(r8, RETURN_REG_ADDR)
220        STORE_REG(r9, RETURN_REG_ADDR)
221
222        {
223         jrp    lr
224        }
225
226        .cfi_endproc
227        .size ffi_call_tile, .-ffi_call_tile
228
229/* ffi_closure_tile(...)
230
231   On entry, lr points to the closure plus 8 bytes, and r10
232   contains the actual return address.
233
234   This function simply dumps all register parameters into a stack array
235   and passes the closure, the registers array, and the stack arguments
236   to C code that does all of the actual closure processing. */
237
238        .section .text.ffi_closure_tile, "ax", @progbits
239        .align  8
240        .globl  ffi_closure_tile
241        FFI_HIDDEN(ffi_closure_tile)
242
243        .cfi_startproc
244/* Room to spill all NUM_ARG_REGS incoming registers, plus frame linkage. */
245#define CLOSURE_FRAME_SIZE (((NUM_ARG_REGS * REG_SIZE * 2 + LINKAGE_SIZE) + 7) & -8)
246ffi_closure_tile:
247        {
248#ifdef __tilegx__
249         st     sp, lr
250         .cfi_offset lr, 0
251#else
252         /* Save return address (in r10 due to closure stub wrapper). */
253         SW     sp, r10
254         .cfi_return_column r10
255         .cfi_offset r10, 0
256#endif
257         /* Compute address for stack frame linkage. */
258         addli   r10, sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
259        }
260        {
261         /* Save incoming stack pointer in linkage area. */
262         SW     r10, sp
263         .cfi_offset sp, -(CLOSURE_FRAME_SIZE - REG_SIZE)
264         /* Push a new stack frame. */
265         addli   sp, sp, -CLOSURE_FRAME_SIZE
266         .cfi_adjust_cfa_offset CLOSURE_FRAME_SIZE
267        }
268
269        {
270         /* Create pointer to where to start spilling registers. */
271         addi   r10, sp, LINKAGE_SIZE
272        }
273
274        /* Spill all the incoming registers. */
275        STORE_REG(r0, r10)
276        STORE_REG(r1, r10)
277        STORE_REG(r2, r10)
278        STORE_REG(r3, r10)
279        STORE_REG(r4, r10)
280        STORE_REG(r5, r10)
281        STORE_REG(r6, r10)
282        STORE_REG(r7, r10)
283        STORE_REG(r8, r10)
284        {
285         /* Save r9. */
286         SW     r10, r9
287#ifdef __tilegx__
288         /* Pointer to closure is passed in r11. */
289         move  r0, r11
290#else
291         /* Compute pointer to the closure object. Because the closure
292            starts with a "jal ffi_closure_tile", we can just take the
293            value of lr (a phony return address pointing into the closure)
294            and subtract 8. */
295         addi   r0, lr, -8
296#endif
297         /* Compute a pointer to the register arguments we just spilled. */
298         addi   r1, sp, LINKAGE_SIZE
299        }
300        {
301         /* Compute a pointer to the extra stack arguments (if any). */
302         addli   r2, sp, CLOSURE_FRAME_SIZE + LINKAGE_SIZE
303         /* Call C code to deal with all of the grotty details. */
304         jal    ffi_closure_tile_inner
305        }
306        {
307         addli   r10, sp, CLOSURE_FRAME_SIZE
308        }
309        {
310         /* Restore the return address. */
311         LW     lr, r10
312         /* Compute pointer to registers array. */
313         addli   r10, sp, LINKAGE_SIZE + (NUM_ARG_REGS * REG_SIZE)
314        }
315        /* Return all the register values, which C code may have set. */
316        LOAD_REG(r0, r10)
317        LOAD_REG(r1, r10)
318        LOAD_REG(r2, r10)
319        LOAD_REG(r3, r10)
320        LOAD_REG(r4, r10)
321        LOAD_REG(r5, r10)
322        LOAD_REG(r6, r10)
323        LOAD_REG(r7, r10)
324        LOAD_REG(r8, r10)
325        LOAD_REG(r9, r10)
326        {
327         /* Pop the frame. */
328         addli   sp, sp, CLOSURE_FRAME_SIZE
329         jrp    lr
330        }
331
332        .cfi_endproc
333        .size   ffi_closure_tile, . - ffi_closure_tile
334
335
336/* What follows are code template instructions that get copied to the
337   closure trampoline by ffi_prep_closure_loc.  The zeroed operands
338   get replaced by their proper values at runtime. */
339
340        .section .text.ffi_template_tramp_tile, "ax", @progbits
341        .align  8
342        .globl  ffi_template_tramp_tile
343        FFI_HIDDEN(ffi_template_tramp_tile)
344ffi_template_tramp_tile:
345#ifdef __tilegx__
346        {
347          moveli r11, 0 /* backpatched to address of containing closure. */
348          moveli r10, 0 /* backpatched to ffi_closure_tile. */
349        }
350        /* Note: the following bundle gets generated multiple times
351           depending on the pointer value (esp. useful for -m32 mode). */
352        { shl16insli r11, r11, 0 ; shl16insli r10, r10, 0 }
353        { info 2+8 /* for backtracer: -> pc in lr, frame size 0 */ ; jr r10 }
354#else
355        /* 'jal .' yields a PC-relative offset of zero so we can OR in the
356           right offset at runtime. */
357        { move r10, lr ; jal . /* ffi_closure_tile */ }
358#endif
359
360        .size   ffi_template_tramp_tile, . - ffi_template_tramp_tile
361