1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * JNI method invocation.  This is used to call a C/C++ JNI method.  The
19 * argument list has to be pushed onto the native stack according to
20 * local calling conventions.
21 *
22 * This version supports the "new" ARM EABI.
23 */
24
25#include <machine/cpu-features.h>
26
27#ifdef __ARM_EABI__
28
29#ifdef EXTENDED_EABI_DEBUG
30# define DBG
31#else
32# define DBG @
33#endif
34
35
36/*
37Function prototype:
38
39void dvmPlatformInvoke(void* pEnv, ClassObject* clazz, int argInfo, int argc,
40    const u4* argv, const char* signature, void* func, JValue* pReturn)
41
42The method we are calling has the form:
43
44  return_type func(JNIEnv* pEnv, ClassObject* clazz, ...)
45    -or-
46  return_type func(JNIEnv* pEnv, Object* this, ...)
47
48We receive a collection of 32-bit values which correspond to arguments from
49the interpreter (e.g. float occupies one, double occupies two).  It's up to
50us to convert these into local calling conventions.
51*/
52
53/*
54ARM EABI notes:
55
56r0-r3 hold first 4 args to a method
57r9 is given special treatment in some situations, but not for us
58r10 (sl) seems to be generally available
59r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
60r12 (ip) is scratch -- not preserved across method calls
61r13 (sp) should be managed carefully in case a signal arrives
62r14 (lr) must be preserved
63r15 (pc) can be tinkered with directly
64
65r0 holds returns of <= 4 bytes
66r0-r1 hold returns of 8 bytes, low word in r0
67
68Callee must save/restore r4+ (except r12) if it modifies them.
69
70Stack is "full descending".  Only the arguments that don't fit in the first 4
71registers are placed on the stack.  "sp" points at the first stacked argument
72(i.e. the 5th arg).
73
74VFP: single-precision results in s0, double-precision results in d0.
75
76In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
7764-bit quantities (long long, double) must be 64-bit aligned.  This means
78we have to scan the method signature, identify arguments that must be
79padded, and fix them up appropriately.
80*/
81
82    .text
83    .align  2
84    .global dvmPlatformInvoke
85    .type   dvmPlatformInvoke, %function
86
87/*
88 * On entry:
89 *   r0  JNIEnv (can be left alone)
90 *   r1  clazz (NULL for virtual method calls, non-NULL for static)
91 *   r2  arg info
92 *   r3  argc (number of 32-bit values in argv)
93 *   [sp]     argv
94 *   [sp,#4]  short signature
95 *   [sp,#8]  func
96 *   [sp,#12] pReturn
97 *
98 * For a virtual method call, the "this" reference is in argv[0].
99 *
100 * argInfo (32-bit int) layout:
101 *   SRRRLLLL FFFFFFFF FFFFFFFF FFFFFFFF
102 *
103 *   S - if set, do things the hard way (scan the signature)
104 *   R - return-type enumeration, really only important for "hard" FP ABI
105 *   L - number of double-words of storage required on stack (0-30 words)
106 *   F - pad flag -- if set, write a pad word to the stack
107 *
108 * With this arrangement we can efficiently push up to 24 words of arguments
109 * onto the stack.  Anything requiring more than that -- which should happen
110 * rarely to never -- can do the slow signature scan.
111 *
112 * (We could pack the Fs more efficiently -- we know we never push two pads
113 * in a row, and the first word can never be a pad -- but there's really
114 * no need for it.)
115 *
116 * NOTE: if the called function has more than 4 words of arguments, gdb
117 * will not be able to unwind the stack past this method.  The only way
118 * around this is to convince gdb to respect an explicit frame pointer.
119 * The stack unwinder in debuggerd *does* pay attention to fp if we set it
120 * up appropriately, so at least that will work.
121 */
122dvmPlatformInvoke:
123    .fnstart
124
125    /*
126     * Save regs.
127     *
128     * On entry to a function, "sp" must be 64-bit aligned.  This means
129     * we have to adjust sp manually if we push an odd number of regs here
130     * (both here and when exiting).
131     *
132     * The ARM spec doesn't specify anything about the frame pointer.  gcc
133     * points fp at the first saved argument, so our "full descending"
134     * stack looks like:
135     *
136     *  pReturn
137     *  func
138     *  shorty
139     *  argv        <-- sp on entry
140     *  lr          <-- fp
141     *  fp
142     *  r9...r7
143     *  r6          <-- sp after reg save
144     *
145     * Any arguments that need to be pushed on for the target method
146     * come after this.  The last argument is pushed first.
147     */
148SAVED_REG_COUNT = 6                     @ push 6 regs
149FP_STACK_OFFSET = (SAVED_REG_COUNT-1) * 4 @ offset between fp and post-save sp
150FP_ADJ = 4                              @ fp is initial sp +4
151
152    .save        {r6, r7, r8, r9, fp, lr}
153    stmfd   sp!, {r6, r7, r8, r9, fp, lr}
154
155    .setfp  fp, sp, #FP_STACK_OFFSET    @ point fp at first saved reg
156    add     fp, sp, #FP_STACK_OFFSET
157
158    @.pad    #4                          @ adjust for 64-bit align
159    @sub     sp, sp, #4                  @ (if we save odd number of regs)
160
161    @ Ensure 64-bit alignment.  EABI guarantees sp is aligned on entry, make
162    @ sure we're aligned properly now.
163DBG tst     sp, #4                      @ 64-bit aligned?
164DBG bne     dvmAbort                    @ no, fail
165
166    ldr     r9, [fp, #0+FP_ADJ]         @ r9<- argv
167    cmp     r1, #0                      @ calling a static method?
168
169    @ Not static, grab the "this" pointer.  Note "this" is not explicitly
170    @ described by the method signature.
171    subeq   r3, r3, #1                  @ argc--
172    ldreq   r1, [r9], #4                @ r1<- *argv++
173
174    @ Do we have arg padding flags in "argInfo"? (just need to check hi bit)
175    teq     r2, #0
176    bmi     .Lno_arg_info
177
178    /*
179     * "Fast" path.
180     *
181     * Make room on the stack for the arguments and copy them over,
182     * inserting pad words when appropriate.
183     *
184     * Currently:
185     *  r0  don't touch
186     *  r1  don't touch
187     *  r2  arg info
188     *  r3  argc
189     *  r4-r5  don't touch (not saved)
190     *  r6-r8 (available)
191     *  r9  argv
192     *  fp  frame pointer
193     */
194.Lhave_arg_info:
195    @ Expand the stack by the specified amount.  We want to extract the
196    @ count of double-words from r2, multiply it by 8, and subtract that
197    @ from the stack pointer.
198    and     ip, r2, #0x0f000000         @ ip<- double-words required
199    mov     r6, r2, lsr #28             @ r6<- return type
200    sub     sp, sp, ip, lsr #21         @ shift right 24, then left 3
201    mov     r8, sp                      @ r8<- sp  (arg copy dest)
202
203    @ Stick argv in r7 and advance it past the argv values that will be
204    @ held in r2-r3.  It's possible r3 will hold a pad, so check the
205    @ bit in r2.  We do this by ignoring the first bit (which would
206    @ indicate a pad in r2) and shifting the second into the carry flag.
207    @ If the carry is set, r3 will hold a pad, so we adjust argv less.
208    @
209    @ (This is harmless if argc==0)
210    mov     r7, r9
211    movs    r2, r2, lsr #2
212    addcc   r7, r7, #8                  @ skip past 2 words, for r2 and r3
213    subcc   r3, r3, #2
214    addcs   r7, r7, #4                  @ skip past 1 word, for r2
215    subcs   r3, r3, #1
216
217.Lfast_copy_loop:
218    @ if (--argc < 0) goto invoke
219    subs    r3, r3, #1
220    bmi     .Lcopy_done                 @ NOTE: expects original argv in r9
221
222.Lfast_copy_loop2:
223    @ Get pad flag into carry bit.  If it's set, we don't pull a value
224    @ out of argv.
225    movs    r2, r2, lsr #1
226    ldrcc   ip, [r7], #4                @ ip = *r7++ (pull from argv)
227    strcc   ip, [r8], #4                @ *r8++ = ip (write to stack)
228    bcc     .Lfast_copy_loop
229
230DBG movcs   ip, #-3                     @ DEBUG DEBUG - make pad word obvious
231DBG strcs   ip, [r8]                    @ DEBUG DEBUG
232    add     r8, r8, #4                  @ if pad, just advance ip without store
233    b       .Lfast_copy_loop2           @ don't adjust argc after writing pad
234
235
236.Lcopy_done:
237    /*
238     * Currently:
239     *  r0-r3  args (JNIEnv*, thisOrClass, arg0, arg1)
240     *  r6  return type (enum DalvikJniReturnType)
241     *  r9  original argv
242     *  fp  frame pointer
243     *
244     * The stack copy is complete.  Grab the first two words off of argv
245     * and tuck them into r2/r3.  If the first arg is 32-bit and the second
246     * arg is 64-bit, then r3 "holds" a pad word and the load is unnecessary
247     * but harmless.
248     *
249     * If there are 0 or 1 arg words in argv, we will be loading uninitialized
250     * data into the registers, but since nothing tries to use it it's also
251     * harmless (assuming argv[0] and argv[1] point to valid memory, which
252     * is a reasonable assumption for Dalvik's interpreted stacks).
253     */
254    ldmia   r9, {r2-r3}                 @ r2/r3<- argv[0]/argv[1]
255
256    ldr     ip, [fp, #8+FP_ADJ]         @ ip<- func
257#ifdef __ARM_HAVE_BLX
258    blx     ip                          @ call func
259#else
260    mov     lr, pc                      @ call func the old-fashioned way
261    bx      ip
262#endif
263
264    @ We're back, result is in r0 or (for long/double) r0-r1.
265    @
266    @ In theory, we need to use the "return type" arg to figure out what
267    @ we have and how to return it.  However, unless we have an FPU and
268    @ "hard" fp calling conventions, all we need to do is copy r0-r1 into
269    @ the JValue union.
270    @
271    @ Thought: could redefine DalvikJniReturnType such that single-word
272    @ and double-word values occupy different ranges; simple comparison
273    @ allows us to choose between str and stm.  Probably not worthwhile.
274    @
275    cmp     r6, #0                      @ DALVIK_JNI_RETURN_VOID?
276    ldrne   ip, [fp, #12+FP_ADJ]        @ pReturn
277    sub     sp, fp, #FP_STACK_OFFSET    @ restore sp to post-reg-save offset
278    stmneia ip, {r0-r1}                 @ pReturn->j <- r0/r1
279
280    @ Restore the registers we saved and return.  On >= ARMv5TE we can
281    @ restore PC directly from the saved LR.
282#ifdef __ARM_HAVE_PC_INTERWORK
283    ldmfd   sp!, {r6, r7, r8, r9, fp, pc}
284#else
285    ldmfd   sp!, {r6, r7, r8, r9, fp, lr}
286    bx      lr
287#endif
288
289
290
291    /*
292     * "Slow" path.
293     * Walk through the argument list, counting up the number of 32-bit words
294     * required to contain it.  Then walk through it a second time, copying
295     * values out to the stack.  (We could pre-compute the size to save
296     * ourselves a trip, but we'd have to store that somewhere -- this is
297     * sufficiently unlikely that it's not worthwhile.)
298     *
299     * Try not to make any assumptions about the number of args -- I think
300     * the class file format allows up to 64K words (need to verify that).
301     *
302     * Currently:
303     *  r0  don't touch
304     *  r1  don't touch
305     *  r2  (available)
306     *  r3  argc
307     *  r4-r5 don't touch (not saved)
308     *  r6-r8 (available)
309     *  r9  argv
310     *  fp  frame pointer
311     */
312.Lno_arg_info:
313    mov     ip, r2, lsr #28             @ ip<- return type
314    ldr     r6, [fp, #4+FP_ADJ]         @ r6<- short signature
315    add     r6, r6, #1                  @ advance past return type
316    mov     r2, #0                      @ r2<- word count, init to zero
317
318.Lcount_loop:
319    ldrb    ip, [r6], #1                @ ip<- *signature++
320    cmp     ip, #0                      @ end?
321    beq     .Lcount_done                @ all done, bail
322    add     r2, r2, #1                  @ count++
323    cmp     ip, #'D'                    @ look for 'D' or 'J', which are 64-bit
324    cmpne   ip, #'J'
325    bne     .Lcount_loop
326
327    @ 64-bit value, insert padding if we're not aligned
328    tst     r2, #1                      @ odd after initial incr?
329    addne   r2, #1                      @ no, add 1 more to cover 64 bits
330    addeq   r2, #2                      @ yes, treat prev as pad, incr 2 now
331    b       .Lcount_loop
332.Lcount_done:
333
334    @ We have the padded-out word count in r2.  We subtract 2 from it
335    @ because we don't push the first two arg words on the stack (they're
336    @ destined for r2/r3).  Pushing them on and popping them off would be
337    @ simpler but slower.
338    subs    r2, r2, #2                  @ subtract 2 (for contents of r2/r3)
339    movmis  r2, #0                      @ if negative, peg at zero, set Z-flag
340    beq     .Lcopy_done                 @ zero args, skip stack copy
341
342DBG tst     sp, #7                      @ DEBUG - make sure sp is aligned now
343DBG bne     dvmAbort                    @ DEBUG
344
345    @ Set up to copy from r7 to r8.  We copy from the second arg to the
346    @ last arg, which means reading and writing to ascending addresses.
347    sub     sp, sp, r2, asl #2          @ sp<- sp - r2*4
348    bic     sp, #4                      @ subtract another 4 ifn
349    mov     r7, r9                      @ r7<- argv
350    mov     r8, sp                      @ r8<- sp
351
352    @ We need to copy words from [r7] to [r8].  We walk forward through
353    @ the signature again, "copying" pad words when appropriate, storing
354    @ upward into the stack.
355    ldr     r6, [fp, #4+FP_ADJ]         @ r6<- signature
356    add     r6, r6, #1                  @ advance past return type
357    add     r7, r7, #8                  @ r7<- r7+8 (assume argv 0/1 in r2/r3)
358
359    @ Eat first arg or two, for the stuff that goes into r2/r3.
360    ldrb    ip, [r6], #1                @ ip<- *signature++
361    cmp     ip, #'D'
362    cmpne   ip, #'J'
363    beq     .Lstack_copy_loop           @ 64-bit arg fills r2+r3
364
365    @ First arg was 32-bit, check the next
366    ldrb    ip, [r6], #1                @ ip<- *signature++
367    cmp     ip, #'D'
368    cmpne   ip, #'J'
369    subeq   r7, #4                      @ r7<- r7-4 (take it back - pad word)
370    beq     .Lstack_copy_loop2          @ start with char we already have
371
372    @ Two 32-bit args, fall through and start with next arg
373
374.Lstack_copy_loop:
375    ldrb    ip, [r6], #1                @ ip<- *signature++
376.Lstack_copy_loop2:
377    cmp     ip, #0                      @ end of shorty?
378    beq     .Lcopy_done                 @ yes
379
380    cmp     ip, #'D'
381    cmpne   ip, #'J'
382    beq     .Lcopy64
383
384    @ Copy a 32-bit value.  [r8] is initially at the end of the stack.  We
385    @ use "full descending" stacks, so we store into [r8] and incr as we
386    @ move toward the end of the arg list.
387.Lcopy32:
388    ldr     ip, [r7], #4
389    str     ip, [r8], #4
390    b       .Lstack_copy_loop
391
392.Lcopy64:
393    @ Copy a 64-bit value.  If necessary, leave a hole in the stack to
394    @ ensure alignment.  We know the [r8] output area is 64-bit aligned,
395    @ so we can just mask the address.
396    add     r8, r8, #7          @ r8<- (r8+7) & ~7
397    ldr     ip, [r7], #4
398    bic     r8, r8, #7
399    ldr     r2, [r7], #4
400    str     ip, [r8], #4
401    str     r2, [r8], #4
402    b       .Lstack_copy_loop
403
404    .fnend
405    .size   dvmPlatformInvoke, .-dvmPlatformInvoke
406
407#if 0
408
409/*
410 * Spit out a "we were here", preserving all registers.  (The attempt
411 * to save ip won't work, but we need to save an even number of
412 * registers for EABI 64-bit stack alignment.)
413 */
414     .macro SQUEAK num
415common_squeak\num:
416    stmfd   sp!, {r0, r1, r2, r3, ip, lr}
417    ldr     r0, strSqueak
418    mov     r1, #\num
419    bl      printf
420#ifdef __ARM_HAVE_PC_INTERWORK
421    ldmfd   sp!, {r0, r1, r2, r3, ip, pc}
422#else
423    ldmfd   sp!, {r0, r1, r2, r3, ip, lr}
424    bx      lr
425#endif
426    .endm
427
428    SQUEAK  0
429    SQUEAK  1
430    SQUEAK  2
431    SQUEAK  3
432    SQUEAK  4
433    SQUEAK  5
434
435strSqueak:
436    .word   .LstrSqueak
437.LstrSqueak:
438    .asciz  "<%d>"
439
440    .align  2
441
442#endif
443
444#endif /*__ARM_EABI__*/
445