quick_entrypoints_x86_64.S revision fea29016a79f39ac12a4ba4ebdcbc86800c03427
1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18
19MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
20    // Create space for ART FP callee-saved registers
21    subq MACRO_LITERAL(4 * 8), %rsp
22    CFI_ADJUST_CFA_OFFSET(4 * 8)
23    movq %xmm12, 0(%rsp)
24    movq %xmm13, 8(%rsp)
25    movq %xmm14, 16(%rsp)
26    movq %xmm15, 24(%rsp)
27END_MACRO
28
29MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
30    // Restore ART FP callee-saved registers
31    movq 0(%rsp), %xmm12
32    movq 8(%rsp), %xmm13
33    movq 16(%rsp), %xmm14
34    movq 24(%rsp), %xmm15
35    addq MACRO_LITERAL(4 * 8), %rsp
36    CFI_ADJUST_CFA_OFFSET(- 4 * 8)
37END_MACRO
38
39// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
40
41    /*
42     * Macro that sets up the callee save frame to conform with
43     * Runtime::CreateCalleeSaveMethod(kSaveAll)
44     */
45MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
46#if defined(__APPLE__)
47    int3
48    int3
49#else
50    // R10 := Runtime::Current()
51    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
52    movq (%r10), %r10
53    // Save callee save registers to agree with core spills bitmap.
54    PUSH r15  // Callee save.
55    PUSH r14  // Callee save.
56    PUSH r13  // Callee save.
57    PUSH r12  // Callee save.
58    PUSH rbp  // Callee save.
59    PUSH rbx  // Callee save.
60    // Create space for FPR args, plus padding for alignment
61    subq LITERAL(4 * 8), %rsp
62    CFI_ADJUST_CFA_OFFSET(4 * 8)
63    // Save FPRs.
64    movq %xmm12, 0(%rsp)
65    movq %xmm13, 8(%rsp)
66    movq %xmm14, 16(%rsp)
67    movq %xmm15, 24(%rsp)
68    subq MACRO_LITERAL(8), %rsp  // Space for Method* (also aligns the frame).
69    CFI_ADJUST_CFA_OFFSET(8)
70    // R10 := ArtMethod* for save all callee save frame method.
71    THIS_LOAD_REQUIRES_READ_BARRIER
72    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
73    // Store ArtMethod* to bottom of stack.
74    movq %r10, 0(%rsp)
75
76    // Ugly compile-time check, but we only have the preprocessor.
77    // Last +8: implicit return address pushed on stack when caller made call.
78#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
79#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
80#endif
81#endif  // __APPLE__
82END_MACRO
83
84    /*
85     * Macro that sets up the callee save frame to conform with
86     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
87     */
88MACRO0(SETUP_REF_ONLY_CALLEE_SAVE_FRAME)
89#if defined(__APPLE__)
90    int3
91    int3
92#else
93    // R10 := Runtime::Current()
94    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
95    movq (%r10), %r10
96    // Save callee and GPR args, mixed together to agree with core spills bitmap.
97    PUSH r15  // Callee save.
98    PUSH r14  // Callee save.
99    PUSH r13  // Callee save.
100    PUSH r12  // Callee save.
101    PUSH rbp  // Callee save.
102    PUSH rbx  // Callee save.
103    // Create space for FPR args, plus padding for alignment
104    subq LITERAL(8 + 4*8), %rsp
105    CFI_ADJUST_CFA_OFFSET(8 + 4*8)
106    // Save FPRs.
107    movq %xmm12, 8(%rsp)
108    movq %xmm13, 16(%rsp)
109    movq %xmm14, 24(%rsp)
110    movq %xmm15, 32(%rsp)
111    // R10 := ArtMethod* for refs only callee save frame method.
112    THIS_LOAD_REQUIRES_READ_BARRIER
113    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
114    // Store ArtMethod* to bottom of stack.
115    movq %r10, 0(%rsp)
116
117    // Ugly compile-time check, but we only have the preprocessor.
118    // Last +8: implicit return address pushed on stack when caller made call.
119#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6*8 + 4*8 + 8 + 8)
120#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
121#endif
122#endif  // __APPLE__
123END_MACRO
124
125MACRO0(RESTORE_REF_ONLY_CALLEE_SAVE_FRAME)
126    movq 8(%rsp), %xmm12
127    movq 16(%rsp), %xmm13
128    movq 24(%rsp), %xmm14
129    movq 32(%rsp), %xmm15
130    addq LITERAL(8 + 4*8), %rsp
131    CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
132    // TODO: optimize by not restoring callee-saves restored by the ABI
133    POP rbx
134    POP rbp
135    POP r12
136    POP r13
137    POP r14
138    POP r15
139END_MACRO
140
141    /*
142     * Macro that sets up the callee save frame to conform with
143     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
144     */
145MACRO0(SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME)
146#if defined(__APPLE__)
147    int3
148    int3
149#else
150    // R10 := Runtime::Current()
151    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
152    movq (%r10), %r10
153    // Save callee and GPR args, mixed together to agree with core spills bitmap.
154    PUSH r15  // Callee save.
155    PUSH r14  // Callee save.
156    PUSH r13  // Callee save.
157    PUSH r12  // Callee save.
158    PUSH r9   // Quick arg 5.
159    PUSH r8   // Quick arg 4.
160    PUSH rsi  // Quick arg 1.
161    PUSH rbp  // Callee save.
162    PUSH rbx  // Callee save.
163    PUSH rdx  // Quick arg 2.
164    PUSH rcx  // Quick arg 3.
165    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
166    subq MACRO_LITERAL(80 + 4 * 8), %rsp
167    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
168    // R10 := ArtMethod* for ref and args callee save frame method.
169    THIS_LOAD_REQUIRES_READ_BARRIER
170    movq RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
171    // Save FPRs.
172    movq %xmm0, 16(%rsp)
173    movq %xmm1, 24(%rsp)
174    movq %xmm2, 32(%rsp)
175    movq %xmm3, 40(%rsp)
176    movq %xmm4, 48(%rsp)
177    movq %xmm5, 56(%rsp)
178    movq %xmm6, 64(%rsp)
179    movq %xmm7, 72(%rsp)
180    movq %xmm12, 80(%rsp)
181    movq %xmm13, 88(%rsp)
182    movq %xmm14, 96(%rsp)
183    movq %xmm15, 104(%rsp)
184    // Store ArtMethod* to bottom of stack.
185    movq %r10, 0(%rsp)
186
187    // Ugly compile-time check, but we only have the preprocessor.
188    // Last +8: implicit return address pushed on stack when caller made call.
189#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11*8 + 4*8 + 80 + 8)
190#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
191#endif
192#endif  // __APPLE__
193END_MACRO
194
195MACRO0(RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME)
196    // Restore FPRs.
197    movq 16(%rsp), %xmm0
198    movq 24(%rsp), %xmm1
199    movq 32(%rsp), %xmm2
200    movq 40(%rsp), %xmm3
201    movq 48(%rsp), %xmm4
202    movq 56(%rsp), %xmm5
203    movq 64(%rsp), %xmm6
204    movq 72(%rsp), %xmm7
205    movq 80(%rsp), %xmm12
206    movq 88(%rsp), %xmm13
207    movq 96(%rsp), %xmm14
208    movq 104(%rsp), %xmm15
209    addq MACRO_LITERAL(80 + 4 * 8), %rsp
210    CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
211    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
212    POP rcx
213    POP rdx
214    POP rbx
215    POP rbp
216    POP rsi
217    POP r8
218    POP r9
219    POP r12
220    POP r13
221    POP r14
222    POP r15
223END_MACRO
224
225
226    /*
227     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
228     * exception is Thread::Current()->exception_.
229     */
230MACRO0(DELIVER_PENDING_EXCEPTION)
231    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
232    // (Thread*, SP) setup
233    movq %gs:THREAD_SELF_OFFSET, %rdi
234    movq %rsp, %rsi
235    call PLT_SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*, SP)
236    UNREACHABLE
237END_MACRO
238
239MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
240    DEFINE_FUNCTION VAR(c_name, 0)
241    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
242    // Outgoing argument set up
243    movq %rsp, %rsi                    // pass SP
244    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
245    call PLT_VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
246    UNREACHABLE
247    END_FUNCTION VAR(c_name, 0)
248END_MACRO
249
250MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
251    DEFINE_FUNCTION VAR(c_name, 0)
252    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
253    // Outgoing argument set up
254    movq %rsp, %rdx                    // pass SP
255    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
256    call PLT_VAR(cxx_name, 1)     // cxx_name(arg1, Thread*, SP)
257    UNREACHABLE
258    END_FUNCTION VAR(c_name, 0)
259END_MACRO
260
261MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
262    DEFINE_FUNCTION VAR(c_name, 0)
263    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
264    // Outgoing argument set up
265    movq %rsp, %rcx                    // pass SP
266    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
267    call PLT_VAR(cxx_name, 1)     // cxx_name(Thread*, SP)
268    UNREACHABLE
269    END_FUNCTION VAR(c_name, 0)
270END_MACRO
271
272    /*
273     * Called by managed code to create and deliver a NullPointerException.
274     */
275NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
276
277    /*
278     * Called by managed code to create and deliver an ArithmeticException.
279     */
280NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
281
282    /*
283     * Called by managed code to create and deliver a StackOverflowError.
284     */
285NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
286
287    /*
288     * Called by managed code, saves callee saves and then calls artThrowException
289     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
290     */
291ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
292
293    /*
294     * Called by managed code to create and deliver a NoSuchMethodError.
295     */
296ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
297
298    /*
299     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
300     * index, arg2 holds limit.
301     */
302TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
303
304    /*
305     * All generated callsites for interface invokes and invocation slow paths will load arguments
306     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
307     * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
308     * stack and call the appropriate C helper.
309     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
310     *
311     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
312     * of the target Method* in rax and method->code_ in rdx.
313     *
314     * If unsuccessful, the helper will return NULL/????. There will be a pending exception in the
315     * thread and we branch to another stub to deliver it.
316     *
317     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
318     * location on the stack.
319     *
320     * Adapted from x86 code.
321     */
322MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
323    DEFINE_FUNCTION VAR(c_name, 0)
324    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
325    // Helper signature is always
326    // (method_idx, *this_object, *caller_method, *self, sp)
327
328    movl FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %edx  // pass caller Method*
329    movq %gs:THREAD_SELF_OFFSET, %rcx                      // pass Thread
330    movq %rsp, %r8                                         // pass SP
331
332    call PLT_VAR(cxx_name, 1)                   // cxx_name(arg1, arg2, caller method*, Thread*, SP)
333                                                           // save the code pointer
334    movq %rax, %rdi
335    movq %rdx, %rax
336    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
337
338    testq %rdi, %rdi
339    jz 1f
340
341    // Tail call to intended method.
342    jmp *%rax
3431:
344    DELIVER_PENDING_EXCEPTION
345    END_FUNCTION VAR(c_name, 0)
346END_MACRO
347
348INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
349INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
350
351INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
352INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
353INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
354INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
355
356
357    /*
358     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
359     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
360     * the end of the shorty.
361     */
362MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3631: // LOOP
364    movb (%r10), %al              // al := *shorty
365    addq MACRO_LITERAL(1), %r10   // shorty++
366    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
367    je VAR(finished, 1)
368    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
369    je 2f
370    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
371    je 3f
372    addq MACRO_LITERAL(4), %r11   // arg_array++
373    //  Handle extra space in arg array taken by a long.
374    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
375    jne 1b
376    addq MACRO_LITERAL(4), %r11   // arg_array++
377    jmp 1b                        // goto LOOP
3782:  // FOUND_DOUBLE
379    movsd (%r11), REG_VAR(xmm_reg, 0)
380    addq MACRO_LITERAL(8), %r11   // arg_array+=2
381    jmp 4f
3823:  // FOUND_FLOAT
383    movss (%r11), REG_VAR(xmm_reg, 0)
384    addq MACRO_LITERAL(4), %r11   // arg_array++
3854:
386END_MACRO
387
388    /*
389     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
390     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
391     * the end of the shorty.
392     */
393MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
3941: // LOOP
395    movb (%r10), %al              // al := *shorty
396    addq MACRO_LITERAL(1), %r10   // shorty++
397    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
398    je  VAR(finished, 2)
399    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
400    je 2f
401    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
402    je 3f
403    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
404    je 4f
405    movl (%r11), REG_VAR(gpr_reg32, 1)
406    addq MACRO_LITERAL(4), %r11   // arg_array++
407    jmp 5f
4082:  // FOUND_LONG
409    movq (%r11), REG_VAR(gpr_reg64, 0)
410    addq MACRO_LITERAL(8), %r11   // arg_array+=2
411    jmp 5f
4123:  // SKIP_FLOAT
413    addq MACRO_LITERAL(4), %r11   // arg_array++
414    jmp 1b
4154:  // SKIP_DOUBLE
416    addq MACRO_LITERAL(8), %r11   // arg_array+=2
417    jmp 1b
4185:
419END_MACRO
420
421    /*
422     * Quick invocation stub.
423     * On entry:
424     *   [sp] = return address
425     *   rdi = method pointer
426     *   rsi = argument array that must at least contain the this pointer.
427     *   rdx = size of argument array in bytes
428     *   rcx = (managed) thread pointer
429     *   r8 = JValue* result
430     *   r9 = char* shorty
431     */
432DEFINE_FUNCTION art_quick_invoke_stub
433#if defined(__APPLE__)
434    int3
435    int3
436#else
437    // Set up argument XMM registers.
438    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
439    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
440    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
441    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
442    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
443    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
444    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
445    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
446    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
447    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
448    .balign 16
449.Lxmm_setup_finished:
450    PUSH rbp                      // Save rbp.
451    PUSH r8                       // Save r8/result*.
452    PUSH r9                       // Save r9/shorty*.
453    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
454    CFI_DEF_CFA_REGISTER(rbp)
455
456    movl %edx, %r10d
457    addl LITERAL(60), %edx        // Reserve space for return addr, StackReference<method>, rbp,
458                                  // r8 and r9 in frame.
459    andl LITERAL(0xFFFFFFF0), %edx    // Align frame size to 16 bytes.
460    subl LITERAL(32), %edx        // Remove space for return address, rbp, r8 and r9.
461    subq %rdx, %rsp               // Reserve stack space for argument array.
462
463#if (STACK_REFERENCE_SIZE != 4)
464#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
465#endif
466    movl LITERAL(0), (%rsp)       // Store NULL for method*
467
468    movl %r10d, %ecx              // Place size of args in rcx.
469    movq %rdi, %rax               // RAX := method to be called
470    movq %rsi, %r11               // R11 := arg_array
471    leaq 4(%rsp), %rdi            // Rdi is pointing just above the StackReference<method> in the
472                                  // stack arguments.
473    // Copy arg array into stack.
474    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
475    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
476    movq %rax, %rdi               // RDI := method to be called
477    movl (%r11), %esi             // RSI := this pointer
478    addq LITERAL(4), %r11         // arg_array++
479    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
480    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
481    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
482    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
483.Lgpr_setup_finished:
484    call *METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
485    movq %rbp, %rsp               // Restore stack pointer.
486    CFI_DEF_CFA_REGISTER(rsp)
487    POP r9                        // Pop r9 - shorty*.
488    POP r8                        // Pop r8 - result*.
489    POP rbp                       // Pop rbp
490    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
491    je .Lreturn_double_quick
492    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
493    je .Lreturn_float_quick
494    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
495    ret
496.Lreturn_double_quick:
497    movsd %xmm0, (%r8)           // Store the double floating point result.
498    ret
499.Lreturn_float_quick:
500    movss %xmm0, (%r8)           // Store the floating point result.
501    ret
502#endif  // __APPLE__
503END_FUNCTION art_quick_invoke_stub
504
505    /*
506     * Quick invocation stub.
507     * On entry:
508     *   [sp] = return address
509     *   rdi = method pointer
510     *   rsi = argument array or NULL if no arguments.
511     *   rdx = size of argument array in bytes
512     *   rcx = (managed) thread pointer
513     *   r8 = JValue* result
514     *   r9 = char* shorty
515     */
516DEFINE_FUNCTION art_quick_invoke_static_stub
517#if defined(__APPLE__)
518    int3
519    int3
520#else
521    // Set up argument XMM registers.
522    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
523    movq %rsi, %r11               // R11 := arg_array
524    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
525    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
526    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
527    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
528    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
529    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
530    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
531    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
532    .balign 16
533.Lxmm_setup_finished2:
534    PUSH rbp                      // Save rbp.
535    PUSH r8                       // Save r8/result*.
536    PUSH r9                       // Save r9/shorty*.
537    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
538    CFI_DEF_CFA_REGISTER(rbp)
539
540    movl %edx, %r10d
541    addl LITERAL(60), %edx        // Reserve space for return addr, StackReference<method>, rbp,
542                                  // r8 and r9 in frame.
543    andl LITERAL(0xFFFFFFF0), %edx    // Align frame size to 16 bytes.
544    subl LITERAL(32), %edx        // Remove space for return address, rbp, r8 and r9.
545    subq %rdx, %rsp               // Reserve stack space for argument array.
546
547#if (STACK_REFERENCE_SIZE != 4)
548#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
549#endif
550    movl LITERAL(0), (%rsp)       // Store NULL for method*
551
552    movl %r10d, %ecx              // Place size of args in rcx.
553    movq %rdi, %rax               // RAX := method to be called
554    movq %rsi, %r11               // R11 := arg_array
555    leaq 4(%rsp), %rdi            // Rdi is pointing just above the StackReference<method> in the
556                                  // stack arguments.
557    // Copy arg array into stack.
558    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
559    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
560    movq %rax, %rdi               // RDI := method to be called
561    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
562    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
563    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
564    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
565    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
566.Lgpr_setup_finished2:
567    call *METHOD_QUICK_CODE_OFFSET(%rdi) // Call the method.
568    movq %rbp, %rsp               // Restore stack pointer.
569    CFI_DEF_CFA_REGISTER(rsp)
570    POP r9                        // Pop r9 - shorty*.
571    POP r8                        // Pop r8 - result*.
572    POP rbp                       // Pop rbp
573    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
574    je .Lreturn_double_quick2
575    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
576    je .Lreturn_float_quick2
577    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
578    ret
579.Lreturn_double_quick2:
580    movsd %xmm0, (%r8)           // Store the double floating point result.
581    ret
582.Lreturn_float_quick2:
583    movss %xmm0, (%r8)           // Store the floating point result.
584    ret
585#endif  // __APPLE__
586END_FUNCTION art_quick_invoke_static_stub
587
588    /*
589     * Long jump stub.
590     * On entry:
591     *   rdi = gprs
592     *   rsi = fprs
593     */
594DEFINE_FUNCTION art_quick_do_long_jump
595#if defined(__APPLE__)
596    int3
597    int3
598#else
599    // Restore FPRs.
600    movq 0(%rsi), %xmm0
601    movq 8(%rsi), %xmm1
602    movq 16(%rsi), %xmm2
603    movq 24(%rsi), %xmm3
604    movq 32(%rsi), %xmm4
605    movq 40(%rsi), %xmm5
606    movq 48(%rsi), %xmm6
607    movq 56(%rsi), %xmm7
608    movq 64(%rsi), %xmm8
609    movq 72(%rsi), %xmm9
610    movq 80(%rsi), %xmm10
611    movq 88(%rsi), %xmm11
612    movq 96(%rsi), %xmm12
613    movq 104(%rsi), %xmm13
614    movq 112(%rsi), %xmm14
615    movq 120(%rsi), %xmm15
616    // Restore FPRs.
617    movq %rdi, %rsp   // RSP points to gprs.
618    // Load all registers except RSP and RIP with values in gprs.
619    popq %r15
620    popq %r14
621    popq %r13
622    popq %r12
623    popq %r11
624    popq %r10
625    popq %r9
626    popq %r8
627    popq %rdi
628    popq %rsi
629    popq %rbp
630    addq LITERAL(8), %rsp   // Skip rsp
631    popq %rbx
632    popq %rdx
633    popq %rcx
634    popq %rax
635    popq %rsp      // Load stack pointer.
636    ret            // From higher in the stack pop rip.
637#endif  // __APPLE__
638END_FUNCTION art_quick_do_long_jump
639
640MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
641    DEFINE_FUNCTION VAR(c_name, 0)
642    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save ref containing registers for GC
643    // Outgoing argument set up
644    movq %rsp, %rsi                   // pass SP
645    movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current()
646    call PLT_VAR(cxx_name, 1)         // cxx_name(Thread*, SP)
647    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
648    CALL_MACRO(return_macro, 2)       // return or deliver exception
649    END_FUNCTION VAR(c_name, 0)
650END_MACRO
651
652MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
653    DEFINE_FUNCTION VAR(c_name, 0)
654    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
655    // Outgoing argument set up
656    movq %rsp, %rdx                    // pass SP
657    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
658    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, Thread*, SP)
659    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
660    CALL_MACRO(return_macro, 2)        // return or deliver exception
661    END_FUNCTION VAR(c_name, 0)
662END_MACRO
663
664MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
665    DEFINE_FUNCTION VAR(c_name, 0)
666    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
667    // Outgoing argument set up
668    movq %rsp, %rcx                    // pass SP
669    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
670    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, Thread*, SP)
671    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
672    CALL_MACRO(return_macro, 2)       // return or deliver exception
673    END_FUNCTION VAR(c_name, 0)
674END_MACRO
675
676MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
677    DEFINE_FUNCTION VAR(c_name, 0)
678    SETUP_REF_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
679    // Outgoing argument set up
680    movq %rsp, %r8                     // pass SP
681    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
682    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, Thread*, SP)
683    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
684    CALL_MACRO(return_macro, 2)        // return or deliver exception
685    END_FUNCTION VAR(c_name, 0)
686END_MACRO
687
688MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
689    DEFINE_FUNCTION VAR(c_name, 0)
690    movl 8(%rsp), %esi                 // pass referrer
691    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
692                                       // arg0 is in rdi
693    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
694    movq %rsp, %rcx                    // pass SP
695    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
696    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
697    CALL_MACRO(return_macro, 2)
698    END_FUNCTION VAR(c_name, 0)
699END_MACRO
700
701MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
702    DEFINE_FUNCTION VAR(c_name, 0)
703    movl 8(%rsp), %edx                 // pass referrer
704    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
705                                       // arg0 and arg1 are in rdi/rsi
706    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
707    movq %rsp, %r8                     // pass SP
708    call PLT_VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
709    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
710    CALL_MACRO(return_macro, 2)
711    END_FUNCTION VAR(c_name, 0)
712END_MACRO
713
714MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
715    DEFINE_FUNCTION VAR(c_name, 0)
716    movl 8(%rsp), %ecx                 // pass referrer
717    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
718                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
719    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
720    movq %rsp, %r9                     // pass SP
721    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
722    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
723    CALL_MACRO(return_macro, 2)        // return or deliver exception
724    END_FUNCTION VAR(c_name, 0)
725END_MACRO
726
727MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
728    testq %rax, %rax               // rax == 0 ?
729    jz  1f                         // if rax == 0 goto 1
730    ret                            // return
7311:                                 // deliver exception on current thread
732    DELIVER_PENDING_EXCEPTION
733END_MACRO
734
735MACRO0(RETURN_IF_EAX_ZERO)
736    testl %eax, %eax               // eax == 0 ?
737    jnz  1f                        // if eax != 0 goto 1
738    ret                            // return
7391:                                 // deliver exception on current thread
740    DELIVER_PENDING_EXCEPTION
741END_MACRO
742
743MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
744    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
745    testq %rcx, %rcx               // rcx == 0 ?
746    jnz 1f                         // if rcx != 0 goto 1
747    ret                            // return
7481:                                 // deliver exception on current thread
749    DELIVER_PENDING_EXCEPTION
750END_MACRO
751
752// Generate the allocation entrypoints for each allocator.
753// TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
754// macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
755// to macros and the VAR macro won't concatenate arguments properly), this also breaks having
756// multi-line macros that use each other (hence using 1 macro per newline below).
757#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
758  TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
759#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
760  TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
761#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
762  TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
763#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
764  TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
765#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
766  THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
767#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
768  THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
769#define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
770  THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
771#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
772  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
773#define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
774  THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
775
776GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
777GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
778GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
779GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
780GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
781GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
782GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
783GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
784GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
785
786GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
787GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
788GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
789GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
790GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
791GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
792GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
793GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
794GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
795
796GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
797GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
798GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
799GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
800GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
801GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
802GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
803GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
804GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
805
806GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
807GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
808GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
809GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
810GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
811GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
812GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
813GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
814GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
815
816GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
817GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
818GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
819GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
820GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
821GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
822GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
823GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
824GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
825
826GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
827GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
828GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
829GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
830GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
831GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
832GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
833GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
834GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
835
836GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB)
837GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
838GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
839GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
840GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
841GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
842GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
843GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
844GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
845
846GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
847GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
848GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
849GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
850GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
851GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
852GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
853GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
854GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
855
856TWO_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
857TWO_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
858TWO_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
859TWO_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
860
861TWO_ARG_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
862
863DEFINE_FUNCTION art_quick_lock_object
864    testl %edi, %edi                      // Null check object/rdi.
865    jz   .Lslow_lock
866.Lretry_lock:
867    movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word.
868    test LITERAL(0xC0000000), %ecx        // Test the 2 high bits.
869    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
870    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
871    test %ecx, %ecx
872    jnz  .Lalready_thin                   // Lock word contains a thin lock.
873    // unlocked case - %edx holds thread id with count of 0
874    xor  %eax, %eax                       // eax == 0 for comparison with lock word in cmpxchg
875    lock cmpxchg  %edx, LOCK_WORD_OFFSET(%edi)
876    jnz  .Lretry_lock                     // cmpxchg failed retry
877    ret
878.Lalready_thin:
879    cmpw %cx, %dx                         // do we hold the lock already?
880    jne  .Lslow_lock
881    addl LITERAL(65536), %ecx             // increment recursion count
882    test LITERAL(0xC0000000), %ecx        // overflowed if either of top two bits are set
883    jne  .Lslow_lock                      // count overflowed so go slow
884    movl %ecx, LOCK_WORD_OFFSET(%edi)     // update lockword, cmpxchg not necessary as we hold lock
885    ret
886.Lslow_lock:
887    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
888    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
889    movq %rsp, %rdx                       // pass SP
890    call PLT_SYMBOL(artLockObjectFromCode)  // artLockObjectFromCode(object, Thread*, SP)
891    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME    // restore frame up to return address
892    RETURN_IF_EAX_ZERO
893END_FUNCTION art_quick_lock_object
894
895DEFINE_FUNCTION art_quick_unlock_object
896    testl %edi, %edi                      // null check object/edi
897    jz   .Lslow_unlock
898    movl LOCK_WORD_OFFSET(%edi), %ecx     // ecx := lock word
899    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
900    test LITERAL(0xC0000000), %ecx
901    jnz  .Lslow_unlock                    // lock word contains a monitor
902    cmpw %cx, %dx                         // does the thread id match?
903    jne  .Lslow_unlock
904    cmpl LITERAL(65536), %ecx
905    jae  .Lrecursive_thin_unlock
906    movl LITERAL(0), LOCK_WORD_OFFSET(%edi)
907    ret
908.Lrecursive_thin_unlock:
909    subl LITERAL(65536), %ecx
910    mov  %ecx, LOCK_WORD_OFFSET(%edi)
911    ret
912.Lslow_unlock:
913    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
914    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
915    movq %rsp, %rdx                       // pass SP
916    call PLT_SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*, SP)
917    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME    // restore frame up to return address
918    RETURN_IF_EAX_ZERO
919END_FUNCTION art_quick_unlock_object
920
921DEFINE_FUNCTION art_quick_check_cast
922    PUSH rdi                          // Save args for exc
923    PUSH rsi
924    SETUP_FP_CALLEE_SAVE_FRAME
925    call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
926    testq %rax, %rax
927    jz 1f                             // jump forward if not assignable
928    RESTORE_FP_CALLEE_SAVE_FRAME
929    addq LITERAL(16), %rsp            // pop arguments
930    CFI_ADJUST_CFA_OFFSET(-16)
931
932    ret
9331:
934    RESTORE_FP_CALLEE_SAVE_FRAME
935    POP rsi                           // Pop arguments
936    POP rdi
937    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
938    mov %rsp, %rcx                    // pass SP
939    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
940    call PLT_SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*, SP)
941    int3                              // unreached
942END_FUNCTION art_quick_check_cast
943
944
945    /*
946     * Entry from managed code for array put operations of objects where the value being stored
947     * needs to be checked for compatibility.
948     *
949     * Currently all the parameters should fit into the 32b portions of the registers. Index always
950     * will. So we optimize for a tighter encoding. The 64b versions are in comments.
951     *
952     * rdi(edi) = array, rsi(esi) = index, rdx(edx) = value
953     */
954DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
955#if defined(__APPLE__)
956    int3
957    int3
958#else
959    testl %edi, %edi
960//  testq %rdi, %rdi
961    jnz art_quick_aput_obj_with_bound_check_local
962    jmp art_quick_throw_null_pointer_exception_local
963#endif  // __APPLE__
964END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
965
966
967DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
968#if defined(__APPLE__)
969    int3
970    int3
971#else
972    movl ARRAY_LENGTH_OFFSET(%edi), %ecx
973//  movl ARRAY_LENGTH_OFFSET(%rdi), %ecx      // This zero-extends, so value(%rcx)=value(%ecx)
974    cmpl %ecx, %esi
975    jb art_quick_aput_obj_local
976    mov %esi, %edi
977//  mov %rsi, %rdi
978    mov %ecx, %esi
979//  mov %rcx, %rsi
980    jmp art_quick_throw_array_bounds_local
981#endif  // __APPLE__
982END_FUNCTION art_quick_aput_obj_with_bound_check
983
984
985DEFINE_FUNCTION art_quick_aput_obj
986    testl %edx, %edx                // store of null
987//  test %rdx, %rdx
988    jz .Ldo_aput_null
989    movl CLASS_OFFSET(%edi), %ecx
990//  movq CLASS_OFFSET(%rdi), %rcx
991    movl CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
992//  movq CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
993    cmpl CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
994//  cmpq CLASS_OFFSET(%rdx), %rcx
995    jne .Lcheck_assignability
996.Ldo_aput:
997    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
998//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
999    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1000    shrl LITERAL(7), %edi
1001//  shrl LITERAL(7), %rdi
1002    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1003    ret
1004.Ldo_aput_null:
1005    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1006//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1007    ret
1008.Lcheck_assignability:
1009    // Save arguments.
1010    PUSH rdi
1011    PUSH rsi
1012    PUSH rdx
1013    subq LITERAL(8), %rsp        // Alignment padding.
1014    CFI_ADJUST_CFA_OFFSET(8)
1015    SETUP_FP_CALLEE_SAVE_FRAME
1016
1017                                  // "Uncompress" = do nothing, as already zero-extended on load.
1018    movl CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
1019    movq %rcx, %rdi               // Pass arg1 = array's component type.
1020
1021    call PLT_SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1022
1023    // Exception?
1024    testq %rax, %rax
1025    jz   .Lthrow_array_store_exception
1026
1027    RESTORE_FP_CALLEE_SAVE_FRAME
1028    // Restore arguments.
1029    addq LITERAL(8), %rsp
1030    CFI_ADJUST_CFA_OFFSET(-8)
1031    POP  rdx
1032    POP  rsi
1033    POP  rdi
1034
1035    movl %edx, OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1036//  movq %rdx, OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1037    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1038    shrl LITERAL(7), %edi
1039//  shrl LITERAL(7), %rdi
1040    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1041//  movb %dl, (%rdx, %rdi)
1042    ret
1043.Lthrow_array_store_exception:
1044    RESTORE_FP_CALLEE_SAVE_FRAME
1045    // Restore arguments.
1046    addq LITERAL(8), %rsp
1047    CFI_ADJUST_CFA_OFFSET(-8)
1048    POP  rdx
1049    POP  rsi
1050    POP  rdi
1051
1052    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
1053
1054    // Outgoing argument set up.
1055    movq %rsp, %rcx                         // Pass arg 4 = SP.
1056    movq %rdx, %rsi                         // Pass arg 2 = value.
1057    movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current().
1058                                            // Pass arg 1 = array.
1059
1060    call PLT_SYMBOL(artThrowArrayStoreException) // (array, value, Thread*, SP)
1061    int3                          // unreached
1062END_FUNCTION art_quick_aput_obj
1063
1064// TODO: This is quite silly on X86_64 now.
1065DEFINE_FUNCTION art_quick_memcpy
1066    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1067    ret
1068END_FUNCTION art_quick_memcpy
1069
1070NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
1071
1072UNIMPLEMENTED art_quick_ldiv
1073UNIMPLEMENTED art_quick_lmod
1074UNIMPLEMENTED art_quick_lmul
1075UNIMPLEMENTED art_quick_lshl
1076UNIMPLEMENTED art_quick_lshr
1077UNIMPLEMENTED art_quick_lushr
1078
1079THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
1080THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
1081THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
1082
1083TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1084TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1085TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1086
1087TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
1088TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
1089
1090ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1091ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1092ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1093
1094// This is singled out as the argument order is different.
1095DEFINE_FUNCTION art_quick_set64_static
1096    movq %rsi, %rdx                    // pass new_val
1097    movl 8(%rsp), %esi                 // pass referrer
1098    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
1099                                       // field_idx is in rdi
1100    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
1101    movq %rsp, %r8                     // pass SP
1102    call PLT_SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*, SP)
1103    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
1104    RETURN_IF_EAX_ZERO                 // return or deliver exception
1105END_FUNCTION art_quick_set64_static
1106
1107
1108DEFINE_FUNCTION art_quick_proxy_invoke_handler
1109    // Save callee and GPR args, mixed together to agree with core spills bitmap of ref. and args
1110    // callee save frame.
1111    PUSH r15  // Callee save.
1112    PUSH r14  // Callee save.
1113    PUSH r13  // Callee save.
1114    PUSH r12  // Callee save.
1115    PUSH r9   // Quick arg 5.
1116    PUSH r8   // Quick arg 4.
1117    PUSH rsi  // Quick arg 1.
1118    PUSH rbp  // Callee save.
1119    PUSH rbx  // Callee save.
1120    PUSH rdx  // Quick arg 2.
1121    PUSH rcx  // Quick arg 3.
1122    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
1123    subq LITERAL(80 + 4*8), %rsp
1124    CFI_ADJUST_CFA_OFFSET(80 + 4*8)
1125    // Save FPRs.
1126    movq %xmm0, 16(%rsp)
1127    movq %xmm1, 24(%rsp)
1128    movq %xmm2, 32(%rsp)
1129    movq %xmm3, 40(%rsp)
1130    movq %xmm4, 48(%rsp)
1131    movq %xmm5, 56(%rsp)
1132    movq %xmm6, 64(%rsp)
1133    movq %xmm7, 72(%rsp)
1134    movq %xmm12, 80(%rsp)
1135    movq %xmm13, 88(%rsp)
1136    movq %xmm14, 96(%rsp)
1137    movq %xmm15, 104(%rsp)
1138    // Store proxy method to bottom of stack.
1139    movq %rdi, 0(%rsp)
1140    movq %gs:THREAD_SELF_OFFSET, %rdx  // Pass Thread::Current().
1141    movq %rsp, %rcx                    // Pass SP.
1142    call PLT_SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1143    movq %rax, %xmm0                   // Copy return value in case of float returns.
1144    addq LITERAL(168 + 4*8), %rsp            // Pop arguments.
1145    CFI_ADJUST_CFA_OFFSET(-168 - 4*8)
1146    RETURN_OR_DELIVER_PENDING_EXCEPTION
1147END_FUNCTION art_quick_proxy_invoke_handler
1148
1149    /*
1150     * Called to resolve an imt conflict.
1151     * rax is a hidden argument that holds the target method's dex method index.
1152     */
1153DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1154#if defined(__APPLE__)
1155    int3
1156    int3
1157#else
1158    movl 8(%rsp), %edi            // load caller Method*
1159    movl METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi  // load dex_cache_resolved_methods
1160    movl OBJECT_ARRAY_DATA_OFFSET(%rdi, %rax, 4), %edi  // load the target method
1161    jmp art_quick_invoke_interface_trampoline_local
1162#endif  // __APPLE__
1163END_FUNCTION art_quick_imt_conflict_trampoline
1164
1165DEFINE_FUNCTION art_quick_resolution_trampoline
1166    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
1167    movq %gs:THREAD_SELF_OFFSET, %rdx
1168    movq %rsp, %rcx
1169    call PLT_SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1170    movq %rax, %r10               // Remember returned code pointer in R10.
1171    movq (%rsp), %rdi             // Load called method into RDI.
1172    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
1173    testq %r10, %r10              // If code pointer is NULL goto deliver pending exception.
1174    jz 1f
1175    jmp *%r10                     // Tail call into method.
11761:
1177    DELIVER_PENDING_EXCEPTION
1178END_FUNCTION art_quick_resolution_trampoline
1179
1180/* Generic JNI frame layout:
1181 *
1182 * #-------------------#
1183 * |                   |
1184 * | caller method...  |
1185 * #-------------------#    <--- SP on entry
1186 *
1187 *          |
1188 *          V
1189 *
1190 * #-------------------#
1191 * | caller method...  |
1192 * #-------------------#
1193 * | Return            |
1194 * | R15               |    callee save
1195 * | R14               |    callee save
1196 * | R13               |    callee save
1197 * | R12               |    callee save
1198 * | R9                |    arg5
1199 * | R8                |    arg4
1200 * | RSI/R6            |    arg1
1201 * | RBP/R5            |    callee save
1202 * | RBX/R3            |    callee save
1203 * | RDX/R2            |    arg2
1204 * | RCX/R1            |    arg3
1205 * | XMM7              |    float arg 8
1206 * | XMM6              |    float arg 7
1207 * | XMM5              |    float arg 6
1208 * | XMM4              |    float arg 5
1209 * | XMM3              |    float arg 4
1210 * | XMM2              |    float arg 3
1211 * | XMM1              |    float arg 2
1212 * | XMM0              |    float arg 1
1213 * | Padding           |
1214 * | RDI/Method*       |  <- sp
1215 * #-------------------#
1216 * | Scratch Alloca    |    5K scratch space
1217 * #---------#---------#
1218 * |         | sp*     |
1219 * | Tramp.  #---------#
1220 * | args    | thread  |
1221 * | Tramp.  #---------#
1222 * |         | method  |
1223 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1224 *
1225 *           |
1226 *           v              artQuickGenericJniTrampoline
1227 *
1228 * #-------------------#
1229 * | caller method...  |
1230 * #-------------------#
1231 * | Return            |
1232 * | Callee-Save Data  |
1233 * #-------------------#
1234 * | handle scope      |
1235 * #-------------------#
1236 * | Method*           |    <--- (1)
1237 * #-------------------#
1238 * | local ref cookie  | // 4B
1239 * | handle scope size | // 4B   TODO: roll into call stack alignment?
1240 * #-------------------#
1241 * | JNI Call Stack    |
1242 * #-------------------#    <--- SP on native call
1243 * |                   |
1244 * | Stack for Regs    |    The trampoline assembly will pop these values
1245 * |                   |    into registers for native call
1246 * #-------------------#
1247 * | Native code ptr   |
1248 * #-------------------#
1249 * | Free scratch      |
1250 * #-------------------#
1251 * | Ptr to (1)        |    <--- RSP
1252 * #-------------------#
1253 */
1254    /*
1255     * Called to do a generic JNI down-call
1256     */
1257DEFINE_FUNCTION art_quick_generic_jni_trampoline
1258    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1259    PUSH r15  // Callee save.
1260    PUSH r14  // Callee save.
1261    PUSH r13  // Callee save.
1262    PUSH r12  // Callee save.
1263    PUSH r9   // Quick arg 5.
1264    PUSH r8   // Quick arg 4.
1265    PUSH rsi  // Quick arg 1.
1266    PUSH rbp  // Callee save.
1267    PUSH rbx  // Callee save.
1268    PUSH rdx  // Quick arg 2.
1269    PUSH rcx  // Quick arg 3.
1270    // Create space for FPR args and create 2 slots, 1 of padding and 1 for the ArtMethod*.
1271    subq LITERAL(80 + 4*8), %rsp
1272    CFI_ADJUST_CFA_OFFSET(80 + 4*8)
1273    // Save FPRs.
1274    movq %xmm0, 16(%rsp)
1275    movq %xmm1, 24(%rsp)
1276    movq %xmm2, 32(%rsp)
1277    movq %xmm3, 40(%rsp)
1278    movq %xmm4, 48(%rsp)
1279    movq %xmm5, 56(%rsp)
1280    movq %xmm6, 64(%rsp)
1281    movq %xmm7, 72(%rsp)
1282    movq %xmm12, 80(%rsp)
1283    movq %xmm13, 88(%rsp)
1284    movq %xmm14, 96(%rsp)
1285    movq %xmm15, 104(%rsp)
1286    movq %rdi, 0(%rsp)              // Store native ArtMethod* to bottom of stack.
1287    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1288    CFI_DEF_CFA_REGISTER(rbp)
1289    //
1290    // reserve a lot of space
1291    //
1292    //      4    local state ref
1293    //      4    padding
1294    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1295    //     16    handle scope member fields ?
1296    // +  112    14x 8-byte stack-2-register space
1297    // ------
1298    //   4332
1299    // 16-byte aligned: 4336
1300    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1301    //       Also means: the padding is somewhere in the middle
1302    //
1303    //
1304    // New test: use 5K and release
1305    // 5k = 5120
1306    subq LITERAL(5120), %rsp
1307    // prepare for artQuickGenericJniTrampoline call
1308    // (Thread*,  SP)
1309    //    rdi    rsi      <= C calling convention
1310    //  gs:...   rbp      <= where they are
1311    movq %gs:THREAD_SELF_OFFSET, %rdi
1312    movq %rbp, %rsi
1313    call PLT_SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1314
1315    // The C call will have registered the complete save-frame on success.
1316    // The result of the call is:
1317    // %rax: pointer to native code, 0 on error.
1318    // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
1319
1320    // Check for error = 0.
1321    test %rax, %rax
1322    jz .Lentry_error
1323
1324    // Release part of the alloca.
1325    movq %rdx, %rsp
1326
1327    // pop from the register-passing alloca region
1328    // what's the right layout?
1329    popq %rdi
1330    popq %rsi
1331    popq %rdx
1332    popq %rcx
1333    popq %r8
1334    popq %r9
1335    // TODO: skip floating point if unused, some flag.
1336    movq 0(%rsp), %xmm0
1337    movq 8(%rsp), %xmm1
1338    movq 16(%rsp), %xmm2
1339    movq 24(%rsp), %xmm3
1340    movq 32(%rsp), %xmm4
1341    movq 40(%rsp), %xmm5
1342    movq 48(%rsp), %xmm6
1343    movq 56(%rsp), %xmm7
1344    addq LITERAL(64), %rsp          // floating-point done
1345
1346    // native call
1347    call *%rax
1348
1349    // result sign extension is handled in C code
1350    // prepare for artQuickGenericJniEndTrampoline call
1351    // (Thread*,  result, result_f)
1352    //   rdi      rsi   rdx       <= C calling convention
1353    //  gs:...    rax   xmm0      <= where they are
1354    movq %gs:THREAD_SELF_OFFSET, %rdi
1355    movq %rax, %rsi
1356    movq %xmm0, %rdx
1357    call PLT_SYMBOL(artQuickGenericJniEndTrampoline)
1358
1359    // Tear down the alloca.
1360    movq %rbp, %rsp
1361    CFI_DEF_CFA_REGISTER(rsp)
1362
1363    // Pending exceptions possible.
1364    // TODO: use cmpq, needs direct encoding because of gas bug
1365    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1366    test %rcx, %rcx
1367    jnz .Lexception_in_native
1368
1369    // Tear down the callee-save frame.
1370    // Load FPRs.
1371    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1372    movq 24(%rsp), %xmm1            // neither does this!!!
1373    movq 32(%rsp), %xmm2
1374    movq 40(%rsp), %xmm3
1375    movq 48(%rsp), %xmm4
1376    movq 56(%rsp), %xmm5
1377    movq 64(%rsp), %xmm6
1378    movq 72(%rsp), %xmm7
1379    movq 80(%rsp), %xmm12
1380    movq 88(%rsp), %xmm13
1381    movq 96(%rsp), %xmm14
1382    movq 104(%rsp), %xmm15
1383    // was 80 bytes
1384    addq LITERAL(80 + 4*8), %rsp
1385    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1386    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1387    POP rcx  // Arg.
1388    POP rdx  // Arg.
1389    POP rbx  // Callee save.
1390    POP rbp  // Callee save.
1391    POP rsi  // Arg.
1392    POP r8   // Arg.
1393    POP r9   // Arg.
1394    POP r12  // Callee save.
1395    POP r13  // Callee save.
1396    POP r14  // Callee save.
1397    POP r15  // Callee save.
1398    // store into fpr, for when it's a fpr return...
1399    movq %rax, %xmm0
1400    ret
1401.Lentry_error:
1402    movq %rbp, %rsp
1403    CFI_DEF_CFA_REGISTER(rsp)
1404.Lexception_in_native:
1405    // TODO: the handle scope contains the this pointer which is used by the debugger for exception
1406    //       delivery.
1407    movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1408    movq 24(%rsp), %xmm1            // neither does this!!!
1409    movq 32(%rsp), %xmm2
1410    movq 40(%rsp), %xmm3
1411    movq 48(%rsp), %xmm4
1412    movq 56(%rsp), %xmm5
1413    movq 64(%rsp), %xmm6
1414    movq 72(%rsp), %xmm7
1415    movq 80(%rsp), %xmm12
1416    movq 88(%rsp), %xmm13
1417    movq 96(%rsp), %xmm14
1418    movq 104(%rsp), %xmm15
1419    // was 80 + 32 bytes
1420    addq LITERAL(80 + 4*8), %rsp
1421    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1422    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1423    POP rcx  // Arg.
1424    POP rdx  // Arg.
1425    POP rbx  // Callee save.
1426    POP rbp  // Callee save.
1427    POP rsi  // Arg.
1428    POP r8   // Arg.
1429    POP r9   // Arg.
1430    POP r12  // Callee save.
1431    POP r13  // Callee save.
1432    POP r14  // Callee save.
1433    POP r15  // Callee save.
1434
1435    DELIVER_PENDING_EXCEPTION
1436END_FUNCTION art_quick_generic_jni_trampoline
1437
1438    /*
1439     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1440     * of a quick call:
1441     * RDI = method being called / to bridge to.
1442     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1443     */
1444DEFINE_FUNCTION art_quick_to_interpreter_bridge
1445    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
1446    movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
1447    movq %rsp, %rdx                        // RDX := sp
1448    call PLT_SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1449    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
1450    movq %rax, %xmm0                   // Place return value also into floating point return value.
1451    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1452END_FUNCTION art_quick_to_interpreter_bridge
1453
1454    /*
1455     * Routine that intercepts method calls and returns.
1456     */
1457DEFINE_FUNCTION art_quick_instrumentation_entry
1458#if defined(__APPLE__)
1459    int3
1460    int3
1461#else
1462    SETUP_REF_AND_ARGS_CALLEE_SAVE_FRAME
1463
1464    movq %rdi, %r12               // Preserve method pointer in a callee-save.
1465
1466    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
1467    movq %rsp, %rcx                     // Pass SP.
1468    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %r8   // Pass return PC.
1469
1470    call PLT_SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP, LR)
1471
1472                                  // %rax = result of call.
1473    movq %r12, %rdi               // Reload method pointer.
1474
1475    leaq art_quick_instrumentation_exit_local(%rip), %r12   // Set up return through instrumentation
1476    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
1477
1478    RESTORE_REF_AND_ARGS_CALLEE_SAVE_FRAME
1479
1480    jmp *%rax                     // Tail call to intended method.
1481#endif  // __APPLE__
1482END_FUNCTION art_quick_instrumentation_entry
1483
1484DEFINE_FUNCTION art_quick_instrumentation_exit
1485    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
1486
1487    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
1488
1489    // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
1490    // we would need to fully restore it. As there are a good number of callee-save registers, it
1491    // seems easier to have an extra small stack area. But this should be revisited.
1492
1493    movq  %rsp, %rsi                          // Pass SP.
1494
1495    PUSH rax                  // Save integer result.
1496    subq LITERAL(8), %rsp     // Save floating-point result.
1497    CFI_ADJUST_CFA_OFFSET(8)
1498    movq %xmm0, (%rsp)
1499
1500    movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
1501    movq  %rax, %rdx                          // Pass integer result.
1502    movq  %xmm0, %rcx                         // Pass floating-point result.
1503
1504    call PLT_SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
1505
1506    movq  %rax, %rdi          // Store return PC
1507    movq  %rdx, %rsi          // Store second return PC in hidden arg.
1508
1509    movq (%rsp), %xmm0        // Restore floating-point result.
1510    addq LITERAL(8), %rsp
1511    CFI_ADJUST_CFA_OFFSET(-8)
1512    POP rax                   // Restore integer result.
1513
1514    addq LITERAL(FRAME_SIZE_REFS_ONLY_CALLEE_SAVE), %rsp   // Drop save frame and fake return pc.
1515
1516    jmp   *%rdi               // Return.
1517END_FUNCTION art_quick_instrumentation_exit
1518
1519    /*
1520     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1521     * will long jump to the upcall with a special exception of -1.
1522     */
1523DEFINE_FUNCTION art_quick_deoptimize
1524    pushq %rsi                     // Fake that we were called. Use hidden arg.
1525    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
1526                                   // Stack should be aligned now.
1527    movq %rsp, %rsi                           // Pass SP.
1528    movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
1529    call PLT_SYMBOL(artDeoptimize) // artDeoptimize(Thread*, SP)
1530    int3                           // Unreachable.
1531END_FUNCTION art_quick_deoptimize
1532
1533
1534    /*
1535     * String's compareTo.
1536     *
1537     * On entry:
1538     *    rdi:   this string object (known non-null)
1539     *    rsi:   comp string object (known non-null)
1540     */
1541DEFINE_FUNCTION art_quick_string_compareto
1542    movl STRING_COUNT_OFFSET(%edi), %r8d
1543    movl STRING_COUNT_OFFSET(%esi), %r9d
1544    movl STRING_VALUE_OFFSET(%edi), %r10d
1545    movl STRING_VALUE_OFFSET(%esi), %r11d
1546    movl STRING_OFFSET_OFFSET(%edi), %eax
1547    movl STRING_OFFSET_OFFSET(%esi), %ecx
1548    /* Build pointers to the start of string data */
1549    leal STRING_DATA_OFFSET(%r10d, %eax, 2), %esi
1550    leal STRING_DATA_OFFSET(%r11d, %ecx, 2), %edi
1551    /* Calculate min length and count diff */
1552    movl  %r8d, %ecx
1553    movl  %r8d, %eax
1554    subl  %r9d, %eax
1555    cmovg %r9d, %ecx
1556    /*
1557     * At this point we have:
1558     *   eax: value to return if first part of strings are equal
1559     *   ecx: minimum among the lengths of the two strings
1560     *   esi: pointer to this string data
1561     *   edi: pointer to comp string data
1562     */
1563    jecxz .Lkeep_length
1564    repe cmpsw                    // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1565    jne .Lnot_equal
1566.Lkeep_length:
1567    ret
1568    .balign 16
1569.Lnot_equal:
1570    movzwl  -2(%esi), %eax        // get last compared char from this string
1571    movzwl  -2(%edi), %ecx        // get last compared char from comp string
1572    subl  %ecx, %eax              // return the difference
1573    ret
1574END_FUNCTION art_quick_string_compareto
1575
1576UNIMPLEMENTED art_quick_memcmp16
1577
1578DEFINE_FUNCTION art_quick_assignable_from_code
1579    SETUP_FP_CALLEE_SAVE_FRAME
1580    call PLT_SYMBOL(artIsAssignableFromCode)       // (const mirror::Class*, const mirror::Class*)
1581    RESTORE_FP_CALLEE_SAVE_FRAME
1582    ret
1583END_FUNCTION art_quick_assignable_from_code
1584