quick_entrypoints_x86_64.S revision ab9a0dbf3b63d517da5278b8298e6cd316e09f68
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18
19MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
20    // Create space for ART FP callee-saved registers
21    subq MACRO_LITERAL(4 * 8), %rsp
22    CFI_ADJUST_CFA_OFFSET(4 * 8)
23    movq %xmm12, 0(%rsp)
24    movq %xmm13, 8(%rsp)
25    movq %xmm14, 16(%rsp)
26    movq %xmm15, 24(%rsp)
27END_MACRO
28
29MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
30    // Restore ART FP callee-saved registers
31    movq 0(%rsp), %xmm12
32    movq 8(%rsp), %xmm13
33    movq 16(%rsp), %xmm14
34    movq 24(%rsp), %xmm15
35    addq MACRO_LITERAL(4 * 8), %rsp
36    CFI_ADJUST_CFA_OFFSET(- 4 * 8)
37END_MACRO
38
39// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
40
41    /*
42     * Macro that sets up the callee save frame to conform with
43     * Runtime::CreateCalleeSaveMethod(kSaveAll)
44     */
45MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
46#if defined(__APPLE__)
47    int3
48    int3
49#else
50    // R10 := Runtime::Current()
51    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
52    movq (%r10), %r10
53    // Save callee save registers to agree with core spills bitmap.
54    PUSH r15  // Callee save.
55    PUSH r14  // Callee save.
56    PUSH r13  // Callee save.
57    PUSH r12  // Callee save.
58    PUSH rbp  // Callee save.
59    PUSH rbx  // Callee save.
60    // Create space for FPR args, plus padding for alignment
61    subq LITERAL(4 * 8), %rsp
62    CFI_ADJUST_CFA_OFFSET(4 * 8)
63    // Save FPRs.
64    movq %xmm12, 0(%rsp)
65    movq %xmm13, 8(%rsp)
66    movq %xmm14, 16(%rsp)
67    movq %xmm15, 24(%rsp)
68    subq MACRO_LITERAL(8), %rsp  // Space for Method* (also aligns the frame).
69    CFI_ADJUST_CFA_OFFSET(8)
70    // R10 := ArtMethod* for save all callee save frame method.
71    THIS_LOAD_REQUIRES_READ_BARRIER
72    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
73    // Store ArtMethod* to bottom of stack.
74    movq %r10, 0(%rsp)
75
76    // Ugly compile-time check, but we only have the preprocessor.
77    // Last +8: implicit return address pushed on stack when caller made call.
78#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
79#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
80#endif
81#endif  // __APPLE__
82END_MACRO
83
84    /*
85     * Macro that sets up the callee save frame to conform with
86     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
87     */
88MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
89#if defined(__APPLE__)
90    int3
91    int3
92#else
93    // R10 := Runtime::Current()
94    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
95    movq (%r10), %r10
96    // Save callee and GPR args, mixed together to agree with core spills bitmap.
97    PUSH r15  // Callee save.
98    PUSH r14  // Callee save.
99    PUSH r13  // Callee save.
100    PUSH r12  // Callee save.
101    PUSH rbp  // Callee save.
102    PUSH rbx  // Callee save.
103    // Create space for FPR args, plus padding for alignment
104    subq LITERAL(8 + 4*8), %rsp
105    CFI_ADJUST_CFA_OFFSET(8 + 4*8)
106    // Save FPRs.
107    movq %xmm12, 8(%rsp)
108    movq %xmm13, 16(%rsp)
109    movq %xmm14, 24(%rsp)
110    movq %xmm15, 32(%rsp)
111    // R10 := ArtMethod* for refs only callee save frame method.
112    THIS_LOAD_REQUIRES_READ_BARRIER
113    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
114    // Store ArtMethod* to bottom of stack.
115    movq %r10, 0(%rsp)
116
117    // Ugly compile-time check, but we only have the preprocessor.
118    // Last +8: implicit return address pushed on stack when caller made call.
119#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
120#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
121#endif
122#endif  // __APPLE__
123END_MACRO
124
125MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
126    movq 8(%rsp), %xmm12
127    movq 16(%rsp), %xmm13
128    movq 24(%rsp), %xmm14
129    movq 32(%rsp), %xmm15
130    addq LITERAL(8 + 4*8), %rsp
131    CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
132    // TODO: optimize by not restoring callee-saves restored by the ABI
133    POP rbx
134    POP rbp
135    POP r12
136    POP r13
137    POP r14
138    POP r15
139END_MACRO
140
141    /*
142     * Macro that sets up the callee save frame to conform with
143     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
144     */
145MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
146#if defined(__APPLE__)
147    int3
148    int3
149#else
150    // R10 := Runtime::Current()
151    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
152    movq (%r10), %r10
153    // Save callee and GPR args, mixed together to agree with core spills bitmap.
154    PUSH r15  // Callee save.
155    PUSH r14  // Callee save.
156    PUSH r13  // Callee save.
157    PUSH r12  // Callee save.
158    PUSH r9   // Quick arg 5.
159    PUSH r8   // Quick arg 4.
160    PUSH rsi  // Quick arg 1.
161    PUSH rbp  // Callee save.
162    PUSH rbx  // Callee save.
163    PUSH rdx  // Quick arg 2.
164    PUSH rcx  // Quick arg 3.
165    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
166    subq MACRO_LITERAL(80 + 4 * 8), %rsp
167    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
168    // R10 := ArtMethod* for ref and args callee save frame method.
169    THIS_LOAD_REQUIRES_READ_BARRIER
170    movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
171    // Save FPRs.
172    movq %xmm0, 16(%rsp)
173    movq %xmm1, 24(%rsp)
174    movq %xmm2, 32(%rsp)
175    movq %xmm3, 40(%rsp)
176    movq %xmm4, 48(%rsp)
177    movq %xmm5, 56(%rsp)
178    movq %xmm6, 64(%rsp)
179    movq %xmm7, 72(%rsp)
180    movq %xmm12, 80(%rsp)
181    movq %xmm13, 88(%rsp)
182    movq %xmm14, 96(%rsp)
183    movq %xmm15, 104(%rsp)
184    // Store ArtMethod* to bottom of stack.
185    movq %r10, 0(%rsp)
186
187    // Ugly compile-time check, but we only have the preprocessor.
188    // Last +8: implicit return address pushed on stack when caller made call.
189#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8)
190#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
191#endif
192#endif  // __APPLE__
193END_MACRO
194
195MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
196    // Restore FPRs.
197    movq 16(%rsp), %xmm0
198    movq 24(%rsp), %xmm1
199    movq 32(%rsp), %xmm2
200    movq 40(%rsp), %xmm3
201    movq 48(%rsp), %xmm4
202    movq 56(%rsp), %xmm5
203    movq 64(%rsp), %xmm6
204    movq 72(%rsp), %xmm7
205    movq 80(%rsp), %xmm12
206    movq 88(%rsp), %xmm13
207    movq 96(%rsp), %xmm14
208    movq 104(%rsp), %xmm15
209    addq MACRO_LITERAL(80 + 4 * 8), %rsp
210    CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
211    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
212    POP rcx
213    POP rdx
214    POP rbx
215    POP rbp
216    POP rsi
217    POP r8
218    POP r9
219    POP r12
220    POP r13
221    POP r14
222    POP r15
223END_MACRO
224
225
226    /*
227     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
228     * exception is Thread::Current()->exception_.
229     */
230MACRO0(DELIVER_PENDING_EXCEPTION)
231    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
232    // (Thread*, SP) setup
233    movq %gs:THREAD_SELF_OFFSET, %rdi
234    movq %rsp, %rsi
235    call PLT_SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*, SP)
236    UNREACHABLE
237END_MACRO
238
239MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
240    DEFINE_FUNCTION VAR(c_name, 0)
241    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
242    // Outgoing argument set up
243    movq %rsp, %rsi                    // pass SP
244    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
245    call PLT_VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
246    UNREACHABLE
247    END_FUNCTION VAR(c_name, 0)
248END_MACRO
249
250MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
251    DEFINE_FUNCTION VAR(c_name, 0)
252    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
253    // Outgoing argument set up
254    movq %rsp, %rdx                    // pass SP
255    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
256    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
257    UNREACHABLE
258    END_FUNCTION VAR(c_name, 0)
259END_MACRO
260
261MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
262    DEFINE_FUNCTION VAR(c_name, 0)
263    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
264    // Outgoing argument set up
265    movq %rsp, %rcx                    // pass SP
266    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
267    call PLT_VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
268    UNREACHABLE
269    END_FUNCTION VAR(c_name, 0)
270END_MACRO
271
272    /*
273     * Called by managed code to create and deliver a NullPointerException.
274     */
275NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
276
277    /*
278     * Called by managed code to create and deliver an ArithmeticException.
279     */
280NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
281
282    /*
283     * Called by managed code to create and deliver a StackOverflowError.
284     */
285NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
286
287// On entry to this function, RAX contains the ESP value for the overflow region.
288DEFINE_FUNCTION art_quick_throw_stack_overflow_from_signal
289    // Here, the RSP is above the protected region.  We need to create a
290    // callee save frame and then move RSP down to the overflow region.
291    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
292    mov %rsp, %rsi                    // get current stack pointer, pass SP as second arg
293    mov %rax, %rsp                    // move RSP to the overflow region.
294    mov %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current() as first arg
295    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
296    int3                              // unreached
297END_FUNCTION art_quick_throw_stack_overflow_from_signal
298
299    /*
300     * Called by managed code, saves callee saves and then calls artThrowException
301     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
302     */
303ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
304
305    /*
306     * Called by managed code to create and deliver a NoSuchMethodError.
307     */
308ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
309
310    /*
311     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
312     * index, arg2 holds limit.
313     */
314TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
315
316    /*
317     * All generated callsites for interface invokes and invocation slow paths will load arguments
318     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
319     * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
320     * stack and call the appropriate C helper.
321     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
322     *
323     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
324     * of the target Method* in rax and method->code_ in rdx.
325     *
326     * If unsuccessful, the helper will return NULL/????. There will be a pending exception in the
327     * thread and we branch to another stub to deliver it.
328     *
329     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
330     * location on the stack.
331     *
332     * Adapted from x86 code.
333     */
334MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
335    DEFINE_FUNCTION VAR(c_name, 0)
336    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
337    // Helper signature is always
338    // (method_idx, *this_object, *caller_method, *self, sp)
339
340    movl FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %edx  // pass caller Method*
341    movq %gs:THREAD_SELF_OFFSET, %rcx                      // pass Thread
342    movq %rsp, %r8                                         // pass SP
343
344    call PLT_VAR(cxx_name, 1)                   // cxx_name(arg1, arg2, caller method*, Thread*, SP)
345                                                           // save the code pointer
346    movq %rax, %rdi
347    movq %rdx, %rax
348    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
349
350    testq %rdi, %rdi
351    jz 1f
352
353    // Tail call to intended method.
354    jmp *%rax
3551:
356    DELIVER_PENDING_EXCEPTION
357    END_FUNCTION VAR(c_name, 0)
358END_MACRO
359
360INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
361INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
362
363INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
364INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
365INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
366INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
367
368
369    /*
370     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
371     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
372     * the end of the shorty.
373     */
374MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3751: // LOOP
376    movb (%r10), %al              // al := *shorty
377    addq MACRO_LITERAL(1), %r10   // shorty++
378    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
379    je VAR(finished, 1)
380    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
381    je 2f
382    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
383    je 3f
384    addq MACRO_LITERAL(4), %r11   // arg_array++
385    //  Handle extra space in arg array taken by a long.
386    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
387    jne 1b
388    addq MACRO_LITERAL(4), %r11   // arg_array++
389    jmp 1b                        // goto LOOP
3902:  // FOUND_DOUBLE
391    movsd (%r11), REG_VAR(xmm_reg, 0)
392    addq MACRO_LITERAL(8), %r11   // arg_array+=2
393    jmp 4f
3943:  // FOUND_FLOAT
395    movss (%r11), REG_VAR(xmm_reg, 0)
396    addq MACRO_LITERAL(4), %r11   // arg_array++
3974:
398END_MACRO
399
400    /*
401     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
402     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
403     * the end of the shorty.
404     */
405MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
4061: // LOOP
407    movb (%r10), %al              // al := *shorty
408    addq MACRO_LITERAL(1), %r10   // shorty++
409    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
410    je  VAR(finished, 2)
411    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
412    je 2f
413    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
414    je 3f
415    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
416    je 4f
417    movl (%r11), REG_VAR(gpr_reg32, 1)
418    addq MACRO_LITERAL(4), %r11   // arg_array++
419    jmp 5f
4202:  // FOUND_LONG
421    movq (%r11), REG_VAR(gpr_reg64, 0)
422    addq MACRO_LITERAL(8), %r11   // arg_array+=2
423    jmp 5f
4243:  // SKIP_FLOAT
425    addq MACRO_LITERAL(4), %r11   // arg_array++
426    jmp 1b
4274:  // SKIP_DOUBLE
428    addq MACRO_LITERAL(8), %r11   // arg_array+=2
429    jmp 1b
4305:
431END_MACRO
432
433    /*
434     * Quick invocation stub.
435     * On entry:
436     *   [sp] = return address
437     *   rdi = method pointer
438     *   rsi = argument array that must at least contain the this pointer.
439     *   rdx = size of argument array in bytes
440     *   rcx = (managed) thread pointer
441     *   r8 = JValue* result
442     *   r9 = char* shorty
443     */
444DEFINE_FUNCTION art_quick_invoke_stub
445#if defined(__APPLE__)
446    int3
447    int3
448#else
449    // Set up argument XMM registers.
450    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
451    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
452    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
453    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
454    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
455    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
456    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
457    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
458    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
459    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
460    .balign 16
461.Lxmm_setup_finished:
462    PUSH rbp                      // Save rbp.
463    PUSH r8                       // Save r8/result*.
464    PUSH r9                       // Save r9/shorty*.
465    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
466    CFI_DEF_CFA_REGISTER(rbp)
467
468    movl %edx, %r10d
469    addl LITERAL(60), %edx        // Reserve space for return addr, StackReference<method>, rbp,
470                                  // r8 and r9 in frame.
471    andl LITERAL(0xFFFFFFF0), %edx    // Align frame size to 16 bytes.
472    subl LITERAL(32), %edx        // Remove space for return address, rbp, r8 and r9.
473    subq %rdx, %rsp               // Reserve stack space for argument array.
474
475#if (STACK_REFERENCE_SIZE != 4)
476#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
477#endif
478    movl LITERAL(0), (%rsp)       // Store NULL for method*
479
480    movl %r10d, %ecx              // Place size of args in rcx.
481    movq %rdi, %rax               // RAX := method to be called
482    movq %rsi, %r11               // R11 := arg_array
483    leaq 4(%rsp), %rdi            // Rdi is pointing just above the StackReference<method> in the
484                                  // stack arguments.
485    // Copy arg array into stack.
486    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
487    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
488    movq %rax, %rdi               // RDI := method to be called
489    movl (%r11), %esi             // RSI := this pointer
490    addq LITERAL(4), %r11         // arg_array++
491    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
492    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
493    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
494    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
495.Lgpr_setup_finished:
496    call *METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
497    movq %rbp, %rsp               // Restore stack pointer.
498    CFI_DEF_CFA_REGISTER(rsp)
499    POP r9                        // Pop r9 - shorty*.
500    POP r8                        // Pop r8 - result*.
501    POP rbp                       // Pop rbp
502    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
503    je .Lreturn_double_quick
504    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
505    je .Lreturn_float_quick
506    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
507    ret
508.Lreturn_double_quick:
509    movsd %xmm0, (%r8)           // Store the double floating point result.
510    ret
511.Lreturn_float_quick:
512    movss %xmm0, (%r8)           // Store the floating point result.
513    ret
514#endif  // __APPLE__
515END_FUNCTION art_quick_invoke_stub
516
517    /*
518     * Quick invocation stub.
519     * On entry:
520     *   [sp] = return address
521     *   rdi = method pointer
522     *   rsi = argument array or NULL if no arguments.
523     *   rdx = size of argument array in bytes
524     *   rcx = (managed) thread pointer
525     *   r8 = JValue* result
526     *   r9 = char* shorty
527     */
528DEFINE_FUNCTION art_quick_invoke_static_stub
529#if defined(__APPLE__)
530    int3
531    int3
532#else
533    // Set up argument XMM registers.
534    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
535    movq %rsi, %r11               // R11 := arg_array
536    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
537    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
538    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
539    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
540    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
541    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
542    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
543    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
544    .balign 16
545.Lxmm_setup_finished2:
546    PUSH rbp                      // Save rbp.
547    PUSH r8                       // Save r8/result*.
548    PUSH r9                       // Save r9/shorty*.
549    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
550    CFI_DEF_CFA_REGISTER(rbp)
551
552    movl %edx, %r10d
553    addl LITERAL(60), %edx        // Reserve space for return addr, StackReference<method>, rbp,
554                                  // r8 and r9 in frame.
555    andl LITERAL(0xFFFFFFF0), %edx    // Align frame size to 16 bytes.
556    subl LITERAL(32), %edx        // Remove space for return address, rbp, r8 and r9.
557    subq %rdx, %rsp               // Reserve stack space for argument array.
558
559#if (STACK_REFERENCE_SIZE != 4)
560#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
561#endif
562    movl LITERAL(0), (%rsp)       // Store NULL for method*
563
564    movl %r10d, %ecx              // Place size of args in rcx.
565    movq %rdi, %rax               // RAX := method to be called
566    movq %rsi, %r11               // R11 := arg_array
567    leaq 4(%rsp), %rdi            // Rdi is pointing just above the StackReference<method> in the
568                                  // stack arguments.
569    // Copy arg array into stack.
570    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
571    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
572    movq %rax, %rdi               // RDI := method to be called
573    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
574    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
575    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
576    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
577    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
578.Lgpr_setup_finished2:
579    call *METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
580    movq %rbp, %rsp               // Restore stack pointer.
581    CFI_DEF_CFA_REGISTER(rsp)
582    POP r9                        // Pop r9 - shorty*.
583    POP r8                        // Pop r8 - result*.
584    POP rbp                       // Pop rbp
585    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
586    je .Lreturn_double_quick2
587    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
588    je .Lreturn_float_quick2
589    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
590    ret
591.Lreturn_double_quick2:
592    movsd %xmm0, (%r8)           // Store the double floating point result.
593    ret
594.Lreturn_float_quick2:
595    movss %xmm0, (%r8)           // Store the floating point result.
596    ret
597#endif  // __APPLE__
598END_FUNCTION art_quick_invoke_static_stub
599
600    /*
601     * Long jump stub.
602     * On entry:
603     *   rdi = gprs
604     *   rsi = fprs
605     */
606DEFINE_FUNCTION art_quick_do_long_jump
607#if defined(__APPLE__)
608    int3
609    int3
610#else
611    // Restore FPRs.
612    movq 0(%rsi), %xmm0
613    movq 8(%rsi), %xmm1
614    movq 16(%rsi), %xmm2
615    movq 24(%rsi), %xmm3
616    movq 32(%rsi), %xmm4
617    movq 40(%rsi), %xmm5
618    movq 48(%rsi), %xmm6
619    movq 56(%rsi), %xmm7
620    movq 64(%rsi), %xmm8
621    movq 72(%rsi), %xmm9
622    movq 80(%rsi), %xmm10
623    movq 88(%rsi), %xmm11
624    movq 96(%rsi), %xmm12
625    movq 104(%rsi), %xmm13
626    movq 112(%rsi), %xmm14
627    movq 120(%rsi), %xmm15
628    // Restore FPRs.
629    movq %rdi, %rsp   // RSP points to gprs.
630    // Load all registers except RSP and RIP with values in gprs.
631    popq %r15
632    popq %r14
633    popq %r13
634    popq %r12
635    popq %r11
636    popq %r10
637    popq %r9
638    popq %r8
639    popq %rdi
640    popq %rsi
641    popq %rbp
642    addq LITERAL(8), %rsp   // Skip rsp
643    popq %rbx
644    popq %rdx
645    popq %rcx
646    popq %rax
647    popq %rsp      // Load stack pointer.
648    ret            // From higher in the stack pop rip.
649#endif  // __APPLE__
650END_FUNCTION art_quick_do_long_jump
651
652MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
653    DEFINE_FUNCTION VAR(c_name, 0)
654    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
655    // Outgoing argument set up
656    movq %rsp, %rsi                   // pass SP
657    movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
658    call PLT_VAR(cxx_name, 1)         // cxx_name(Thread*, SP)
659    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
660    CALL_MACRO(return_macro, 2)       // return or deliver exception
661    END_FUNCTION VAR(c_name, 0)
662END_MACRO
663
664MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
665    DEFINE_FUNCTION VAR(c_name, 0)
666    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
667    // Outgoing argument set up
668    movq %rsp, %rdx                    // pass SP
669    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
670    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, Thread*, SP)
671    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
672    CALL_MACRO(return_macro, 2)        // return or deliver exception
673    END_FUNCTION VAR(c_name, 0)
674END_MACRO
675
676MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
677    DEFINE_FUNCTION VAR(c_name, 0)
678    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
679    // Outgoing argument set up
680    movq %rsp, %rcx                    // pass SP
681    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
682    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, Thread*, SP)
683    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
684    CALL_MACRO(return_macro, 2)       // return or deliver exception
685    END_FUNCTION VAR(c_name, 0)
686END_MACRO
687
688MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
689    DEFINE_FUNCTION VAR(c_name, 0)
690    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
691    // Outgoing argument set up
692    movq %rsp, %r8                     // pass SP
693    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
694    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, Thread*, SP)
695    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
696    CALL_MACRO(return_macro, 2)        // return or deliver exception
697    END_FUNCTION VAR(c_name, 0)
698END_MACRO
699
700MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
701    DEFINE_FUNCTION VAR(c_name, 0)
702    movl 8(%rsp), %esi                 // pass referrer
703    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
704                                       // arg0 is in rdi
705    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
706    movq %rsp, %rcx                    // pass SP
707    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
708    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
709    CALL_MACRO(return_macro, 2)
710    END_FUNCTION VAR(c_name, 0)
711END_MACRO
712
713MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
714    DEFINE_FUNCTION VAR(c_name, 0)
715    movl 8(%rsp), %edx                 // pass referrer
716    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
717                                       // arg0 and arg1 are in rdi/rsi
718    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
719    movq %rsp, %r8                     // pass SP
720    call PLT_VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
721    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
722    CALL_MACRO(return_macro, 2)
723    END_FUNCTION VAR(c_name, 0)
724END_MACRO
725
726MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
727    DEFINE_FUNCTION VAR(c_name, 0)
728    movl 8(%rsp), %ecx                 // pass referrer
729    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
730                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
731    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
732    movq %rsp, %r9                     // pass SP
733    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
734    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
735    CALL_MACRO(return_macro, 2)        // return or deliver exception
736    END_FUNCTION VAR(c_name, 0)
737END_MACRO
738
739MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
740    testq %rax, %rax               // rax == 0 ?
741    jz  1f                         // if rax == 0 goto 1
742    ret                            // return
7431:                                 // deliver exception on current thread
744    DELIVER_PENDING_EXCEPTION
745END_MACRO
746
747MACRO0(RETURN_IF_EAX_ZERO)
748    testl %eax, %eax               // eax == 0 ?
749    jnz  1f                        // if eax != 0 goto 1
750    ret                            // return
7511:                                 // deliver exception on current thread
752    DELIVER_PENDING_EXCEPTION
753END_MACRO
754
755MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
756    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
757    testq %rcx, %rcx               // rcx == 0 ?
758    jnz 1f                         // if rcx != 0 goto 1
759    ret                            // return
7601:                                 // deliver exception on current thread
761    DELIVER_PENDING_EXCEPTION
762END_MACRO
763
764// Generate the allocation entrypoints for each allocator.
765// TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
766// macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
767// to macros and the VAR macro won't concatenate arguments properly), this also breaks having
768// multi-line macros that use each other (hence using 1 macro per newline below).
769#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
770  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
771#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
772  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
773#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
774  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
775#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
776  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
777#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
778  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
779#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
780  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
781#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
782  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
783#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
784  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
785#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
786  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
787
788GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
789GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
790GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
791GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
792GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
793GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
794GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
795GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
796GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
797
798GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
799GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
800GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
801GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
802GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
803GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
804GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
805GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
806GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
807
808GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
809GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
810GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
811GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
812GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
813GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
814GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
815GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
816GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
817
818GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
819GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
820GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
821GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
822GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
823GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
824GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
825GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
826GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
827
828GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
829GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
830GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
831GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
832GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
833GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
834GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
835GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
836GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
837
838GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
839GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
840GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
841GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
842GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
843GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
844GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
845GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
846GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
847
848GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
849GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
850GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
851GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
852GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
853GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
854GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
855GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
856GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
857
858GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
859GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
860GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
861GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
862GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
863GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
864GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
865GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
866GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
867
868TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
869TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
870TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
871TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
872
873TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
874
875DEFINE_FUNCTION art_quick_lock_object
876    testl %edi, %edi                      // Null check object/rdi.
877    jz   .Lslow_lock
878.Lretry_lock:
879    movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word.
880    test LITERAL(0xC0000000), %ecx        // Test the 2 high bits.
881    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
882    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
883    test %ecx, %ecx
884    jnz  .Lalready_thin                   // Lock word contains a thin lock.
885    // unlocked case - %edx holds thread id with count of 0
886    xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
887    lock cmpxchg  %edx, LOCK_WORD_OFFSET(%edi)
888    jnz  .Lretry_lock                     // cmpxchg failed retry
889    ret
890.Lalready_thin:
891    cmpw %cx, %dx                         // do we hold the lock already?
892    jne  .Lslow_lock
893    addl LITERAL(65536), %ecx             // increment recursion count
894    test LITERAL(0xC0000000), %ecx        // overflowed if either of top two bits are set
895    jne  .Lslow_lock                      // count overflowed so go slow
896    movl %ecx, LOCK_WORD_OFFSET(%edi)     // update lockword, cmpxchg not necessary as we hold lock
897    ret
898.Lslow_lock:
899    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
900    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
901    movq %rsp, %rdx                       // pass SP
902    call PLT_SYMBOL(artLockObjectFromCode)  // artLockObjectFromCode(object, Thread*, SP)
903    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME    // restore frame up to return address
904    RETURN_IF_EAX_ZERO
905END_FUNCTION art_quick_lock_object
906
907DEFINE_FUNCTION art_quick_unlock_object
908    testl %edi, %edi                      // null check object/edi
909    jz   .Lslow_unlock
910    movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word
911    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
912    test LITERAL(0xC0000000), %ecx
913    jnz  .Lslow_unlock                    // lock word contains a monitor
914    cmpw %cx, %dx                         // does the thread id match?
915    jne  .Lslow_unlock
916    cmpl LITERAL(65536), %ecx
917    jae  .Lrecursive_thin_unlock
918    movl LITERAL(0), LOCK_WORD_OFFSET(%edi)
919    ret
920.Lrecursive_thin_unlock:
921    subl LITERAL(65536), %ecx
922    mov  %ecx, LOCK_WORD_OFFSET(%edi)
923    ret
924.Lslow_unlock:
925    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
926    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
927    movq %rsp, %rdx                       // pass SP
928    call PLT_SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*, SP)
929    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME    // restore frame up to return address
930    RETURN_IF_EAX_ZERO
931END_FUNCTION art_quick_unlock_object
932
933DEFINE_FUNCTION art_quick_check_cast
934    PUSH rdi                          // Save args for exc
935    PUSH rsi
936    SETUP_FP_CALLEE_SAVE_FRAME
937    call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
938    testq %rax, %rax
939    jz 1f                             // jump forward if not assignable
940    RESTORE_FP_CALLEE_SAVE_FRAME
941    addq LITERAL(16), %rsp            // pop arguments
942    CFI_ADJUST_CFA_OFFSET(-16)
943
944    ret
9451:
946    RESTORE_FP_CALLEE_SAVE_FRAME
947    POP rsi                           // Pop arguments
948    POP rdi
949    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
950    mov %rsp, %rcx                    // pass SP
951    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
952    call PLT_SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*, SP)
953    int3                              // unreached
954END_FUNCTION art_quick_check_cast
955
956
957    /*
958     * Entry from managed code for array put operations of objects where the value being stored
959     * needs to be checked for compatibility.
960     *
961     * Currently all the parameters should fit into the 32b portions of the registers. Index always
962     * will. So we optimize for a tighter encoding. The 64b versions are in comments.
963     *
964     * rdi(edi) = array, rsi(esi) = index, rdx(edx) = value
965     */
966DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
967#if defined(__APPLE__)
968    int3
969    int3
970#else
971    testl %edi, %edi
972//  testq %rdi, %rdi
973    jnz art_quick_aput_obj_with_bound_check_local
974    jmp art_quick_throw_null_pointer_exception_local
975#endif  // __APPLE__
976END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
977
978
979DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
980#if defined(__APPLE__)
981    int3
982    int3
983#else
984    movl ARRAY_LENGTH_OFFSET(%edi), %ecx
985//  movl ARRAY_LENGTH_OFFSET(%rdi), %ecx      // This zero-extends, so value(%rcx)=value(%ecx)
986    cmpl %ecx, %esi
987    jb art_quick_aput_obj_local
988    mov %esi, %edi
989//  mov %rsi, %rdi
990    mov %ecx, %esi
991//  mov %rcx, %rsi
992    jmp art_quick_throw_array_bounds_local
993#endif  // __APPLE__
994END_FUNCTION art_quick_aput_obj_with_bound_check
995
996
997DEFINE_FUNCTION art_quick_aput_obj
998    testl %edx, %edx                // store of null
999//  test %rdx, %rdx
1000    jz .Ldo_aput_null
1001    movl CLASS_OFFSET(%edi), %ecx
1002//  movq CLASS_OFFSET(%rdi), %rcx
1003    movl CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
1004//  movq CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
1005    cmpl CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
1006//  cmpq CLASS_OFFSET(%rdx), %rcx
1007    jne .Lcheck_assignability
1008.Ldo_aput:
1009    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1010//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1011    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1012    shrl LITERAL(7), %edi
1013//  shrl LITERAL(7), %rdi
1014    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1015    ret
1016.Ldo_aput_null:
1017    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1018//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1019    ret
1020.Lcheck_assignability:
1021    // Save arguments.
1022    PUSH rdi
1023    PUSH rsi
1024    PUSH rdx
1025    subq LITERAL(8), %rsp        // Alignment padding.
1026    CFI_ADJUST_CFA_OFFSET(8)
1027    SETUP_FP_CALLEE_SAVE_FRAME
1028
1029                                  // "Uncompress" = do nothing, as already zero-extended on load.
1030    movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
1031    movq %rcx, %rdi               // Pass arg1 = array's component type.
1032
1033    call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1034
1035    // Exception?
1036    testq %rax, %rax
1037    jz   .Lthrow_array_store_exception
1038
1039    RESTORE_FP_CALLEE_SAVE_FRAME
1040    // Restore arguments.
1041    addq LITERAL(8), %rsp
1042    CFI_ADJUST_CFA_OFFSET(-8)
1043    POP  rdx
1044    POP  rsi
1045    POP  rdi
1046
1047    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1048//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1049    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1050    shrl LITERAL(7), %edi
1051//  shrl LITERAL(7), %rdi
1052    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1053//  movb %dl, (%rdx, %rdi)
1054    ret
1055.Lthrow_array_store_exception:
1056    RESTORE_FP_CALLEE_SAVE_FRAME
1057    // Restore arguments.
1058    addq LITERAL(8), %rsp
1059    CFI_ADJUST_CFA_OFFSET(-8)
1060    POP  rdx
1061    POP  rsi
1062    POP  rdi
1063
1064    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
1065
1066    // Outgoing argument set up.
1067    movq %rsp, %rcx                         // Pass arg 4 = SP.
1068    movq %rdx, %rsi                         // Pass arg 2 = value.
1069    movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current().
1070                                            // Pass arg 1 = array.
1071
1072    call PLT_SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
1073    int3                          // unreached
1074END_FUNCTION art_quick_aput_obj
1075
1076// TODO: This is quite silly on X86_64 now.
1077DEFINE_FUNCTION art_quick_memcpy
1078    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1079    ret
1080END_FUNCTION art_quick_memcpy
1081
1082NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
1083
1084UNIMPLEMENTED art_quick_ldiv
1085UNIMPLEMENTED art_quick_lmod
1086UNIMPLEMENTED art_quick_lmul
1087UNIMPLEMENTED art_quick_lshl
1088UNIMPLEMENTED art_quick_lshr
1089UNIMPLEMENTED art_quick_lushr
1090
1091THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
1092THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
1093THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
1094
1095TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1096TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1097TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1098
1099TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
1100TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
1101
1102ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1103ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1104ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1105
1106// This is singled out as the argument order is different.
1107DEFINE_FUNCTION art_quick_set64_static
1108    movq %rsi, %rdx                    // pass new_val
1109    movl 8(%rsp), %esi                 // pass referrer
1110    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
1111                                       // field_idx is in rdi
1112    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
1113    movq %rsp, %r8                     // pass SP
1114    call PLT_SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*, SP)
1115    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
1116    RETURN_IF_EAX_ZERO                 // return or deliver exception
1117END_FUNCTION art_quick_set64_static
1118
1119
1120DEFINE_FUNCTION art_quick_proxy_invoke_handler
1121    // Save callee and GPR args, mixed together to agree with core spills bitmap of ref. and args
1122    // callee save frame.
1123    PUSH r15  // Callee save.
1124    PUSH r14  // Callee save.
1125    PUSH r13  // Callee save.
1126    PUSH r12  // Callee save.
1127    PUSH r9   // Quick arg 5.
1128    PUSH r8   // Quick arg 4.
1129    PUSH rsi  // Quick arg 1.
1130    PUSH rbp  // Callee save.
1131    PUSH rbx  // Callee save.
1132    PUSH rdx  // Quick arg 2.
1133    PUSH rcx  // Quick arg 3.
1134    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
1135    subq LITERAL(80 + 4*8), %rsp
1136    CFI_ADJUST_CFA_OFFSET(80 + 4*8)
1137    // Save FPRs.
1138    movq %xmm0, 16(%rsp)
1139    movq %xmm1, 24(%rsp)
1140    movq %xmm2, 32(%rsp)
1141    movq %xmm3, 40(%rsp)
1142    movq %xmm4, 48(%rsp)
1143    movq %xmm5, 56(%rsp)
1144    movq %xmm6, 64(%rsp)
1145    movq %xmm7, 72(%rsp)
1146    movq %xmm12, 80(%rsp)
1147    movq %xmm13, 88(%rsp)
1148    movq %xmm14, 96(%rsp)
1149    movq %xmm15, 104(%rsp)
1150    // Store proxy method to bottom of stack.
1151    movq %rdi, 0(%rsp)
1152    movq %gs:THREAD_SELF_OFFSET, %rdx  // Pass Thread::Current().
1153    movq %rsp, %rcx                    // Pass SP.
1154    call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1155    movq %rax, %xmm0                   // Copy return value in case of float returns.
1156    addq LITERAL(168 + 4*8), %rsp            // Pop arguments.
1157    CFI_ADJUST_CFA_OFFSET(-168 - 4*8)
1158    RETURN_OR_DELIVER_PENDING_EXCEPTION
1159END_FUNCTION art_quick_proxy_invoke_handler
1160
1161    /*
1162     * Called to resolve an imt conflict.
1163     * rax is a hidden argument that holds the target method's dex method index.
1164     */
1165DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1166#if defined(__APPLE__)
1167    int3
1168    int3
1169#else
1170    movl 8(%rsp), %edi            // load caller Method*
1171    movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
1172    movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
1173    jmp art_quick_invoke_interface_trampoline_local
1174#endif  // __APPLE__
1175END_FUNCTION art_quick_imt_conflict_trampoline
1176
1177DEFINE_FUNCTION art_quick_resolution_trampoline
1178    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
1179    movq %gs:THREAD_SELF_OFFSET, %rdx
1180    movq %rsp, %rcx
1181    call PLT_SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1182    movq %rax, %r10               // Remember returned code pointer in R10.
1183    movq (%rsp), %rdi             // Load called method into RDI.
1184    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
1185    testq %r10, %r10              // If code pointer is NULL goto deliver pending exception.
1186    jz 1f
1187    jmp *%r10                     // Tail call into method.
11881:
1189    DELIVER_PENDING_EXCEPTION
1190END_FUNCTION art_quick_resolution_trampoline
1191
1192/* Generic JNI frame layout:
1193 *
1194 * #-------------------#
1195 * |                   |
1196 * | caller method...  |
1197 * #-------------------#    <--- SP on entry
1198 *
1199 *          |
1200 *          V
1201 *
1202 * #-------------------#
1203 * | caller method...  |
1204 * #-------------------#
1205 * | Return            |
1206 * | R15               |    callee save
1207 * | R14               |    callee save
1208 * | R13               |    callee save
1209 * | R12               |    callee save
1210 * | R9                |    arg5
1211 * | R8                |    arg4
1212 * | RSI/R6            |    arg1
1213 * | RBP/R5            |    callee save
1214 * | RBX/R3            |    callee save
1215 * | RDX/R2            |    arg2
1216 * | RCX/R1            |    arg3
1217 * | XMM7              |    float arg 8
1218 * | XMM6              |    float arg 7
1219 * | XMM5              |    float arg 6
1220 * | XMM4              |    float arg 5
1221 * | XMM3              |    float arg 4
1222 * | XMM2              |    float arg 3
1223 * | XMM1              |    float arg 2
1224 * | XMM0              |    float arg 1
1225 * | Padding           |
1226 * | RDI/Method*       |  <- sp
1227 * #-------------------#
1228 * | Scratch Alloca    |    5K scratch space
1229 * #---------#---------#
1230 * |         | sp*     |
1231 * | Tramp.  #---------#
1232 * | args    | thread  |
1233 * | Tramp.  #---------#
1234 * |         | method  |
1235 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1236 *
1237 *           |
1238 *           v              artQuickGenericJniTrampoline
1239 *
1240 * #-------------------#
1241 * | caller method...  |
1242 * #-------------------#
1243 * | Return            |
1244 * | Callee-Save Data  |
1245 * #-------------------#
1246 * | handle scope      |
1247 * #-------------------#
1248 * | Method*           |    <--- (1)
1249 * #-------------------#
1250 * | local ref cookie  | // 4B
1251 * | handle scope size | // 4B   TODO: roll into call stack alignment?
1252 * #-------------------#
1253 * | JNI Call Stack    |
1254 * #-------------------#    <--- SP on native call
1255 * |                   |
1256 * | Stack for Regs    |    The trampoline assembly will pop these values
1257 * |                   |    into registers for native call
1258 * #-------------------#
1259 * | Native code ptr   |
1260 * #-------------------#
1261 * | Free scratch      |
1262 * #-------------------#
1263 * | Ptr to (1)        |    <--- RSP
1264 * #-------------------#
1265 */
1266    /*
1267     * Called to do a generic JNI down-call
1268     */
1269DEFINE_FUNCTION art_quick_generic_jni_trampoline
1270    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1271    PUSH r15  // Callee save.
1272    PUSH r14  // Callee save.
1273    PUSH r13  // Callee save.
1274    PUSH r12  // Callee save.
1275    PUSH r9   // Quick arg 5.
1276    PUSH r8   // Quick arg 4.
1277    PUSH rsi  // Quick arg 1.
1278    PUSH rbp  // Callee save.
1279    PUSH rbx  // Callee save.
1280    PUSH rdx  // Quick arg 2.
1281    PUSH rcx  // Quick arg 3.
1282    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
1283    subq LITERAL(80 + 4*8), %rsp
1284    CFI_ADJUST_CFA_OFFSET(80 + 4*8)
1285    // Save FPRs.
1286    movq %xmm0, 16(%rsp)
1287    movq %xmm1, 24(%rsp)
1288    movq %xmm2, 32(%rsp)
1289    movq %xmm3, 40(%rsp)
1290    movq %xmm4, 48(%rsp)
1291    movq %xmm5, 56(%rsp)
1292    movq %xmm6, 64(%rsp)
1293    movq %xmm7, 72(%rsp)
1294    movq %xmm12, 80(%rsp)
1295    movq %xmm13, 88(%rsp)
1296    movq %xmm14, 96(%rsp)
1297    movq %xmm15, 104(%rsp)
1298    movq %rdi, 0(%rsp)              // Store native ArtMethod* to bottom of stack.
1299    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1300    CFI_DEF_CFA_REGISTER(rbp)
1301    //
1302    // reserve a lot of space
1303    //
1304    //      4    local state ref
1305    //      4    padding
1306    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1307    //     16    handle scope member fields ?
1308    // +  112    14x 8-byte stack-2-register space
1309    // ------
1310    //   4332
1311    // 16-byte aligned: 4336
1312    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1313    //       Also means: the padding is somewhere in the middle
1314    //
1315    //
1316    // New test: use 5K and release
1317    // 5k = 5120
1318    subq LITERAL(5120), %rsp
1319    // prepare for artQuickGenericJniTrampoline call
1320    // (Thread*,  SP)
1321    //    rdi    rsi      <= C calling convention
1322    //  gs:...   rbp      <= where they are
1323    movq %gs:THREAD_SELF_OFFSET, %rdi
1324    movq %rbp, %rsi
1325    call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1326
1327    // The C call will have registered the complete save-frame on success.
1328    // The result of the call is:
1329    // %rax: pointer to native code, 0 on error.
1330    // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
1331
1332    // Check for error = 0.
1333    test %rax, %rax
1334    jz .Lentry_error
1335
1336    // Release part of the alloca.
1337    movq %rdx, %rsp
1338
1339    // pop from the register-passing alloca region
1340    // what's the right layout?
1341    popq %rdi
1342    popq %rsi
1343    popq %rdx
1344    popq %rcx
1345    popq %r8
1346    popq %r9
1347    // TODO: skip floating point if unused, some flag.
1348    movq 0(%rsp), %xmm0
1349    movq 8(%rsp), %xmm1
1350    movq 16(%rsp), %xmm2
1351    movq 24(%rsp), %xmm3
1352    movq 32(%rsp), %xmm4
1353    movq 40(%rsp), %xmm5
1354    movq 48(%rsp), %xmm6
1355    movq 56(%rsp), %xmm7
1356    addq LITERAL(64), %rsp          // floating-point done
1357
1358    // native call
1359    call *%rax
1360
1361    // result sign extension is handled in C code
1362    // prepare for artQuickGenericJniEndTrampoline call
1363    // (Thread*,  result, result_f)
1364    //   rdi      rsi   rdx       <= C calling convention
1365    //  gs:...    rax   xmm0      <= where they are
1366    movq %gs:THREAD_SELF_OFFSET, %rdi
1367    movq %rax, %rsi
1368    movq %xmm0, %rdx
1369    call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
1370
1371    // Tear down the alloca.
1372    movq %rbp, %rsp
1373    CFI_DEF_CFA_REGISTER(rsp)
1374
1375    // Pending exceptions possible.
1376    // TODO: use cmpq, needs direct encoding because of gas bug
1377    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1378    test %rcx, %rcx
1379    jnz .Lexception_in_native
1380
1381    // Tear down the callee-save frame.
1382    // Load FPRs.
1383    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1384    movq 24(%rsp), %xmm1            // neither does this!!!
1385    movq 32(%rsp), %xmm2
1386    movq 40(%rsp), %xmm3
1387    movq 48(%rsp), %xmm4
1388    movq 56(%rsp), %xmm5
1389    movq 64(%rsp), %xmm6
1390    movq 72(%rsp), %xmm7
1391    movq 80(%rsp), %xmm12
1392    movq 88(%rsp), %xmm13
1393    movq 96(%rsp), %xmm14
1394    movq 104(%rsp), %xmm15
1395    // was 80 bytes
1396    addq LITERAL(80 + 4*8), %rsp
1397    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1398    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1399    POP rcx  // Arg.
1400    POP rdx  // Arg.
1401    POP rbx  // Callee save.
1402    POP rbp  // Callee save.
1403    POP rsi  // Arg.
1404    POP r8   // Arg.
1405    POP r9   // Arg.
1406    POP r12  // Callee save.
1407    POP r13  // Callee save.
1408    POP r14  // Callee save.
1409    POP r15  // Callee save.
1410    // store into fpr, for when it's a fpr return...
1411    movq %rax, %xmm0
1412    ret
1413.Lentry_error:
1414    movq %rbp, %rsp
1415    CFI_DEF_CFA_REGISTER(rsp)
1416.Lexception_in_native:
1417    // TODO: the handle scope contains the this pointer which is used by the debugger for exception
1418    //       delivery.
1419    movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1420    movq 24(%rsp), %xmm1            // neither does this!!!
1421    movq 32(%rsp), %xmm2
1422    movq 40(%rsp), %xmm3
1423    movq 48(%rsp), %xmm4
1424    movq 56(%rsp), %xmm5
1425    movq 64(%rsp), %xmm6
1426    movq 72(%rsp), %xmm7
1427    movq 80(%rsp), %xmm12
1428    movq 88(%rsp), %xmm13
1429    movq 96(%rsp), %xmm14
1430    movq 104(%rsp), %xmm15
1431    // was 80 + 32 bytes
1432    addq LITERAL(80 + 4*8), %rsp
1433    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1434    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1435    POP rcx  // Arg.
1436    POP rdx  // Arg.
1437    POP rbx  // Callee save.
1438    POP rbp  // Callee save.
1439    POP rsi  // Arg.
1440    POP r8   // Arg.
1441    POP r9   // Arg.
1442    POP r12  // Callee save.
1443    POP r13  // Callee save.
1444    POP r14  // Callee save.
1445    POP r15  // Callee save.
1446
1447    DELIVER_PENDING_EXCEPTION
1448END_FUNCTION art_quick_generic_jni_trampoline
1449
1450    /*
1451     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1452     * of a quick call:
1453     * RDI = method being called / to bridge to.
1454     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1455     */
1456DEFINE_FUNCTION art_quick_to_interpreter_bridge
1457    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
1458    movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
1459    movq %rsp, %rdx                        // RDX := sp
1460    call PLT_SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1461    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
1462    movq %rax, %xmm0                   // Place return value also into floating point return value.
1463    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1464END_FUNCTION art_quick_to_interpreter_bridge
1465
1466    /*
1467     * Routine that intercepts method calls and returns.
1468     */
1469DEFINE_FUNCTION art_quick_instrumentation_entry
1470#if defined(__APPLE__)
1471    int3
1472    int3
1473#else
1474    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
1475
1476    movq %rdi, %r12               // Preserve method pointer in a callee-save.
1477
1478    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
1479    movq %rsp, %rcx                     // Pass SP.
1480    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %r8   // Pass return PC.
1481
1482    call PLT_SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
1483
1484                                  // %rax = result of call.
1485    movq %r12, %rdi               // Reload method pointer.
1486
1487    leaq art_quick_instrumentation_exit_local(%rip), %r12   // Set up return through instrumentation
1488    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
1489
1490    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
1491
1492    jmp *%rax                     // Tail call to intended method.
1493#endif  // __APPLE__
1494END_FUNCTION art_quick_instrumentation_entry
1495
1496DEFINE_FUNCTION art_quick_instrumentation_exit
1497    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
1498
1499    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
1500
1501    // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
1502    // we would need to fully restore it. As there are a good number of callee-save registers, it
1503    // seems easier to have an extra small stack area. But this should be revisited.
1504
1505    movq  %rsp, %rsi                          // Pass SP.
1506
1507    PUSH rax                  // Save integer result.
1508    subq LITERAL(8), %rsp     // Save floating-point result.
1509    CFI_ADJUST_CFA_OFFSET(8)
1510    movq %xmm0, (%rsp)
1511
1512    movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
1513    movq  %rax, %rdx                          // Pass integer result.
1514    movq  %xmm0, %rcx                         // Pass floating-point result.
1515
1516    call PLT_SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
1517
1518    movq  %rax, %rdi          // Store return PC
1519    movq  %rdx, %rsi          // Store second return PC in hidden arg.
1520
1521    movq (%rsp), %xmm0        // Restore floating-point result.
1522    addq LITERAL(8), %rsp
1523    CFI_ADJUST_CFA_OFFSET(-8)
1524    POP rax                   // Restore integer result.
1525
1526    addq LITERAL(FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %rsp   // Drop save frame and fake return pc.
1527
1528    jmp   *%rdi               // Return.
1529END_FUNCTION art_quick_instrumentation_exit
1530
1531    /*
1532     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1533     * will long jump to the upcall with a special exception of -1.
1534     */
1535DEFINE_FUNCTION art_quick_deoptimize
1536    pushq %rsi                     // Fake that we were called. Use hidden arg.
1537    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
1538                                   // Stack should be aligned now.
1539    movq %rsp, %rsi                           // Pass SP.
1540    movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
1541    call PLT_SYMBOL(artDeoptimize) // artDeoptimize(Thread*, SP)
1542    int3                           // Unreachable.
1543END_FUNCTION art_quick_deoptimize
1544
1545
1546    /*
1547     * String's compareTo.
1548     *
1549     * On entry:
1550     *    rdi:   this string object (known non-null)
1551     *    rsi:   comp string object (known non-null)
1552     */
1553DEFINE_FUNCTION art_quick_string_compareto
1554    movl STRING_COUNT_OFFSET(%edi), %r8d
1555    movl STRING_COUNT_OFFSET(%esi), %r9d
1556    movl STRING_VALUE_OFFSET(%edi), %r10d
1557    movl STRING_VALUE_OFFSET(%esi), %r11d
1558    movl STRING_OFFSET_OFFSET(%edi), %eax
1559    movl STRING_OFFSET_OFFSET(%esi), %ecx
1560    /* Build pointers to the start of string data */
1561    leal STRING_DATA_OFFSET(%r10d, %eax, 2), %esi
1562    leal STRING_DATA_OFFSET(%r11d, %ecx, 2), %edi
1563    /* Calculate min length and count diff */
1564    movl  %r8d, %ecx
1565    movl  %r8d, %eax
1566    subl  %r9d, %eax
1567    cmovg %r9d, %ecx
1568    /*
1569     * At this point we have:
1570     *   eax: value to return if first part of strings are equal
1571     *   ecx: minimum among the lengths of the two strings
1572     *   esi: pointer to this string data
1573     *   edi: pointer to comp string data
1574     */
1575    jecxz .Lkeep_length
1576    repe cmpsw                    // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1577    jne .Lnot_equal
1578.Lkeep_length:
1579    ret
1580    .balign 16
1581.Lnot_equal:
1582    movzwl  -2(%esi), %eax        // get last compared char from this string
1583    movzwl  -2(%edi), %ecx        // get last compared char from comp string
1584    subl  %ecx, %eax              // return the difference
1585    ret
1586END_FUNCTION art_quick_string_compareto
1587
1588UNIMPLEMENTED art_quick_memcmp16
1589
1590DEFINE_FUNCTION art_quick_assignable_from_code
1591    SETUP_FP_CALLEE_SAVE_FRAME
1592    call PLT_SYMBOL(artIsAssignableFromCode)       // (const mirror::Class*, const mirror::Class*)
1593    RESTORE_FP_CALLEE_SAVE_FRAME
1594    ret
1595END_FUNCTION art_quick_assignable_from_code
1596