1/*
2 * Copyright (C) 2012 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_x86_64.S"
18
19#include "arch/quick_alloc_entrypoints.S"
20
21MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
22    // Create space for ART FP callee-saved registers
23    subq MACRO_LITERAL(4 * 8), %rsp
24    CFI_ADJUST_CFA_OFFSET(4 * 8)
25    movq %xmm12, 0(%rsp)
26    movq %xmm13, 8(%rsp)
27    movq %xmm14, 16(%rsp)
28    movq %xmm15, 24(%rsp)
29END_MACRO
30
31MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
32    // Restore ART FP callee-saved registers
33    movq 0(%rsp), %xmm12
34    movq 8(%rsp), %xmm13
35    movq 16(%rsp), %xmm14
36    movq 24(%rsp), %xmm15
37    addq MACRO_LITERAL(4 * 8), %rsp
38    CFI_ADJUST_CFA_OFFSET(- 4 * 8)
39END_MACRO
40
41// For x86, the CFA is esp+4, the address above the pushed return address on the stack.
42
43    /*
44     * Macro that sets up the callee save frame to conform with
45     * Runtime::CreateCalleeSaveMethod(kSaveAll)
46     */
47MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
48#if defined(__APPLE__)
49    int3
50    int3
51#else
52    // R10 := Runtime::Current()
53    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
54    movq (%r10), %r10
55    // Save callee save registers to agree with core spills bitmap.
56    PUSH r15  // Callee save.
57    PUSH r14  // Callee save.
58    PUSH r13  // Callee save.
59    PUSH r12  // Callee save.
60    PUSH rbp  // Callee save.
61    PUSH rbx  // Callee save.
62    // Create space for FPR args, plus space for ArtMethod*.
63    subq MACRO_LITERAL(4 * 8 + 8), %rsp
64    CFI_ADJUST_CFA_OFFSET(4 * 8 + 8)
65    // Save FPRs.
66    movq %xmm12, 8(%rsp)
67    movq %xmm13, 16(%rsp)
68    movq %xmm14, 24(%rsp)
69    movq %xmm15, 32(%rsp)
70    // R10 := ArtMethod* for save all callee save frame method.
71    movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
72    // Store ArtMethod* to bottom of stack.
73    movq %r10, 0(%rsp)
74    // Store rsp as the top quick frame.
75    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
76
77    // Ugly compile-time check, but we only have the preprocessor.
78    // Last +8: implicit return address pushed on stack when caller made call.
79#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
80#error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
81#endif
82#endif  // __APPLE__
83END_MACRO
84
85    /*
86     * Macro that sets up the callee save frame to conform with
87     * Runtime::CreateCalleeSaveMethod(kRefsOnly)
88     */
89MACRO0(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME)
90#if defined(__APPLE__)
91    int3
92    int3
93#else
94    // R10 := Runtime::Current()
95    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
96    movq (%r10), %r10
97    // Save callee and GPR args, mixed together to agree with core spills bitmap.
98    PUSH r15  // Callee save.
99    PUSH r14  // Callee save.
100    PUSH r13  // Callee save.
101    PUSH r12  // Callee save.
102    PUSH rbp  // Callee save.
103    PUSH rbx  // Callee save.
104    // Create space for FPR args, plus space for ArtMethod*.
105    subq LITERAL(8 + 4 * 8), %rsp
106    CFI_ADJUST_CFA_OFFSET(8 + 4 * 8)
107    // Save FPRs.
108    movq %xmm12, 8(%rsp)
109    movq %xmm13, 16(%rsp)
110    movq %xmm14, 24(%rsp)
111    movq %xmm15, 32(%rsp)
112    // R10 := ArtMethod* for refs only callee save frame method.
113    movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
114    // Store ArtMethod* to bottom of stack.
115    movq %r10, 0(%rsp)
116    // Store rsp as the stop quick frame.
117    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
118
119    // Ugly compile-time check, but we only have the preprocessor.
120    // Last +8: implicit return address pushed on stack when caller made call.
121#if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
122#error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
123#endif
124#endif  // __APPLE__
125END_MACRO
126
127MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
128    movq 8(%rsp), %xmm12
129    movq 16(%rsp), %xmm13
130    movq 24(%rsp), %xmm14
131    movq 32(%rsp), %xmm15
132    addq LITERAL(8 + 4*8), %rsp
133    CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
134    // TODO: optimize by not restoring callee-saves restored by the ABI
135    POP rbx
136    POP rbp
137    POP r12
138    POP r13
139    POP r14
140    POP r15
141END_MACRO
142
143    /*
144     * Macro that sets up the callee save frame to conform with
145     * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
146     */
147MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
148#if defined(__APPLE__)
149    int3
150    int3
151#else
152    // R10 := Runtime::Current()
153    movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
154    movq (%r10), %r10
155    // Save callee and GPR args, mixed together to agree with core spills bitmap.
156    PUSH r15  // Callee save.
157    PUSH r14  // Callee save.
158    PUSH r13  // Callee save.
159    PUSH r12  // Callee save.
160    PUSH r9   // Quick arg 5.
161    PUSH r8   // Quick arg 4.
162    PUSH rsi  // Quick arg 1.
163    PUSH rbp  // Callee save.
164    PUSH rbx  // Callee save.
165    PUSH rdx  // Quick arg 2.
166    PUSH rcx  // Quick arg 3.
167    // Create space for FPR args and create 2 slots for ArtMethod*.
168    subq MACRO_LITERAL(80 + 4 * 8), %rsp
169    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
170    // R10 := ArtMethod* for ref and args callee save frame method.
171    movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
172    // Save FPRs.
173    movq %xmm0, 16(%rsp)
174    movq %xmm1, 24(%rsp)
175    movq %xmm2, 32(%rsp)
176    movq %xmm3, 40(%rsp)
177    movq %xmm4, 48(%rsp)
178    movq %xmm5, 56(%rsp)
179    movq %xmm6, 64(%rsp)
180    movq %xmm7, 72(%rsp)
181    movq %xmm12, 80(%rsp)
182    movq %xmm13, 88(%rsp)
183    movq %xmm14, 96(%rsp)
184    movq %xmm15, 104(%rsp)
185    // Store ArtMethod* to bottom of stack.
186    movq %r10, 0(%rsp)
187    // Store rsp as the top quick frame.
188    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
189
190    // Ugly compile-time check, but we only have the preprocessor.
191    // Last +8: implicit return address pushed on stack when caller made call.
192#if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8)
193#error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
194#endif
195#endif  // __APPLE__
196END_MACRO
197
198MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI)
199    // Save callee and GPR args, mixed together to agree with core spills bitmap.
200    PUSH r15  // Callee save.
201    PUSH r14  // Callee save.
202    PUSH r13  // Callee save.
203    PUSH r12  // Callee save.
204    PUSH r9   // Quick arg 5.
205    PUSH r8   // Quick arg 4.
206    PUSH rsi  // Quick arg 1.
207    PUSH rbp  // Callee save.
208    PUSH rbx  // Callee save.
209    PUSH rdx  // Quick arg 2.
210    PUSH rcx  // Quick arg 3.
211    // Create space for FPR args and create 2 slots for ArtMethod*.
212    subq LITERAL(80 + 4 * 8), %rsp
213    CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
214    // Save FPRs.
215    movq %xmm0, 16(%rsp)
216    movq %xmm1, 24(%rsp)
217    movq %xmm2, 32(%rsp)
218    movq %xmm3, 40(%rsp)
219    movq %xmm4, 48(%rsp)
220    movq %xmm5, 56(%rsp)
221    movq %xmm6, 64(%rsp)
222    movq %xmm7, 72(%rsp)
223    movq %xmm12, 80(%rsp)
224    movq %xmm13, 88(%rsp)
225    movq %xmm14, 96(%rsp)
226    movq %xmm15, 104(%rsp)
227    // Store ArtMethod to bottom of stack.
228    movq %rdi, 0(%rsp)
229    // Store rsp as the stop quick frame.
230    movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
231END_MACRO
232
233MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
234    // Restore FPRs.
235    movq 16(%rsp), %xmm0
236    movq 24(%rsp), %xmm1
237    movq 32(%rsp), %xmm2
238    movq 40(%rsp), %xmm3
239    movq 48(%rsp), %xmm4
240    movq 56(%rsp), %xmm5
241    movq 64(%rsp), %xmm6
242    movq 72(%rsp), %xmm7
243    movq 80(%rsp), %xmm12
244    movq 88(%rsp), %xmm13
245    movq 96(%rsp), %xmm14
246    movq 104(%rsp), %xmm15
247    addq MACRO_LITERAL(80 + 4 * 8), %rsp
248    CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
249    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
250    POP rcx
251    POP rdx
252    POP rbx
253    POP rbp
254    POP rsi
255    POP r8
256    POP r9
257    POP r12
258    POP r13
259    POP r14
260    POP r15
261END_MACRO
262
263
264    /*
265     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
266     * exception is Thread::Current()->exception_.
267     */
268MACRO0(DELIVER_PENDING_EXCEPTION)
269    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
270    // (Thread*) setup
271    movq %gs:THREAD_SELF_OFFSET, %rdi
272    call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
273    UNREACHABLE
274END_MACRO
275
276MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
277    DEFINE_FUNCTION VAR(c_name)
278    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
279    // Outgoing argument set up
280    movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
281    call VAR(cxx_name)                 // cxx_name(Thread*)
282    UNREACHABLE
283    END_FUNCTION VAR(c_name)
284END_MACRO
285
286MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
287    DEFINE_FUNCTION VAR(c_name)
288    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
289    // Outgoing argument set up
290    movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
291    call VAR(cxx_name)                 // cxx_name(arg1, Thread*)
292    UNREACHABLE
293    END_FUNCTION VAR(c_name)
294END_MACRO
295
296MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
297    DEFINE_FUNCTION VAR(c_name)
298    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME   // save all registers as basis for long jump context
299    // Outgoing argument set up
300    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
301    call VAR(cxx_name)                 // cxx_name(Thread*)
302    UNREACHABLE
303    END_FUNCTION VAR(c_name)
304END_MACRO
305
306    /*
307     * Called by managed code to create and deliver a NullPointerException.
308     */
309NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
310
311    /*
312     * Called by managed code to create and deliver an ArithmeticException.
313     */
314NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
315
316    /*
317     * Called by managed code to create and deliver a StackOverflowError.
318     */
319NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
320
321    /*
322     * Called by managed code, saves callee saves and then calls artThrowException
323     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
324     */
325ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
326
327    /*
328     * Called by managed code to create and deliver a NoSuchMethodError.
329     */
330ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
331
332    /*
333     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
334     * index, arg2 holds limit.
335     */
336TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
337
338    /*
339     * All generated callsites for interface invokes and invocation slow paths will load arguments
340     * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
341     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
342     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
343     *
344     * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
345     * of the target Method* in rax and method->code_ in rdx.
346     *
347     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
348     * thread and we branch to another stub to deliver it.
349     *
350     * On success this wrapper will restore arguments and *jump* to the target, leaving the return
351     * location on the stack.
352     *
353     * Adapted from x86 code.
354     */
355MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
356    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
357    // Helper signature is always
358    // (method_idx, *this_object, *caller_method, *self, sp)
359
360    movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
361    movq %rsp, %rcx                                        // pass SP
362
363    call VAR(cxx_name)                                     // cxx_name(arg1, arg2, Thread*, SP)
364                                                           // save the code pointer
365    movq %rax, %rdi
366    movq %rdx, %rax
367    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
368
369    testq %rdi, %rdi
370    jz 1f
371
372    // Tail call to intended method.
373    jmp *%rax
3741:
375    DELIVER_PENDING_EXCEPTION
376END_MACRO
377MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
378    DEFINE_FUNCTION VAR(c_name)
379    INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
380    END_FUNCTION VAR(c_name)
381END_MACRO
382
383INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
384
385INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
386INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
387INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
388INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
389
390
391    /*
392     * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
393     * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
394     * the end of the shorty.
395     */
396MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
3971: // LOOP
398    movb (%r10), %al              // al := *shorty
399    addq MACRO_LITERAL(1), %r10   // shorty++
400    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
401    je VAR(finished)
402    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
403    je 2f
404    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
405    je 3f
406    addq MACRO_LITERAL(4), %r11   // arg_array++
407    //  Handle extra space in arg array taken by a long.
408    cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
409    jne 1b
410    addq MACRO_LITERAL(4), %r11   // arg_array++
411    jmp 1b                        // goto LOOP
4122:  // FOUND_DOUBLE
413    movsd (%r11), REG_VAR(xmm_reg)
414    addq MACRO_LITERAL(8), %r11   // arg_array+=2
415    jmp 4f
4163:  // FOUND_FLOAT
417    movss (%r11), REG_VAR(xmm_reg)
418    addq MACRO_LITERAL(4), %r11   // arg_array++
4194:
420END_MACRO
421
422    /*
423     * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
424     * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
425     * the end of the shorty.
426     */
427MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
4281: // LOOP
429    movb (%r10), %al              // al := *shorty
430    addq MACRO_LITERAL(1), %r10   // shorty++
431    cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
432    je  VAR(finished)
433    cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
434    je 2f
435    cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
436    je 3f
437    cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
438    je 4f
439    movl (%r11), REG_VAR(gpr_reg32)
440    addq MACRO_LITERAL(4), %r11   // arg_array++
441    jmp 5f
4422:  // FOUND_LONG
443    movq (%r11), REG_VAR(gpr_reg64)
444    addq MACRO_LITERAL(8), %r11   // arg_array+=2
445    jmp 5f
4463:  // SKIP_FLOAT
447    addq MACRO_LITERAL(4), %r11   // arg_array++
448    jmp 1b
4494:  // SKIP_DOUBLE
450    addq MACRO_LITERAL(8), %r11   // arg_array+=2
451    jmp 1b
4525:
453END_MACRO
454
455    /*
456     * Quick invocation stub.
457     * On entry:
458     *   [sp] = return address
459     *   rdi = method pointer
460     *   rsi = argument array that must at least contain the this pointer.
461     *   rdx = size of argument array in bytes
462     *   rcx = (managed) thread pointer
463     *   r8 = JValue* result
464     *   r9 = char* shorty
465     */
466DEFINE_FUNCTION art_quick_invoke_stub
467#if defined(__APPLE__)
468    int3
469    int3
470#else
471    // Set up argument XMM registers.
472    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
473    leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
474    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
475    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
476    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
477    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
478    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
479    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
480    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
481    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
482    .balign 16
483.Lxmm_setup_finished:
484    PUSH rbp                      // Save rbp.
485    PUSH r8                       // Save r8/result*.
486    PUSH r9                       // Save r9/shorty*.
487    PUSH rbx                      // Save native callee save rbx
488    PUSH r12                      // Save native callee save r12
489    PUSH r13                      // Save native callee save r13
490    PUSH r14                      // Save native callee save r14
491    PUSH r15                      // Save native callee save r15
492    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
493    CFI_DEF_CFA_REGISTER(rbp)
494
495    movl %edx, %r10d
496    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
497                                   // r8, r9, rbx, r12, r13, r14, and r15 in frame.
498    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
499    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
500                                   // r13, r14, and r15
501    subq %rdx, %rsp                // Reserve stack space for argument array.
502
503#if (STACK_REFERENCE_SIZE != 4)
504#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
505#endif
506    movq LITERAL(0), (%rsp)       // Store null for method*
507
508    movl %r10d, %ecx              // Place size of args in rcx.
509    movq %rdi, %rax               // rax := method to be called
510    movq %rsi, %r11               // r11 := arg_array
511    leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
512                                  // arguments.
513    // Copy arg array into stack.
514    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
515    leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
516    movq %rax, %rdi               // rdi := method to be called
517    movl (%r11), %esi             // rsi := this pointer
518    addq LITERAL(4), %r11         // arg_array++
519    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
520    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
521    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
522    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
523.Lgpr_setup_finished:
524    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
525    movq %rbp, %rsp               // Restore stack pointer.
526    POP r15                       // Pop r15
527    POP r14                       // Pop r14
528    POP r13                       // Pop r13
529    POP r12                       // Pop r12
530    POP rbx                       // Pop rbx
531    POP r9                        // Pop r9 - shorty*
532    POP r8                        // Pop r8 - result*.
533    POP rbp                       // Pop rbp
534    cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
535    je .Lreturn_double_quick
536    cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
537    je .Lreturn_float_quick
538    movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
539    ret
540.Lreturn_double_quick:
541    movsd %xmm0, (%r8)            // Store the double floating point result.
542    ret
543.Lreturn_float_quick:
544    movss %xmm0, (%r8)            // Store the floating point result.
545    ret
546#endif  // __APPLE__
547END_FUNCTION art_quick_invoke_stub
548
549    /*
550     * Quick invocation stub.
551     * On entry:
552     *   [sp] = return address
553     *   rdi = method pointer
554     *   rsi = argument array or null if no arguments.
555     *   rdx = size of argument array in bytes
556     *   rcx = (managed) thread pointer
557     *   r8 = JValue* result
558     *   r9 = char* shorty
559     */
560DEFINE_FUNCTION art_quick_invoke_static_stub
561#if defined(__APPLE__)
562    int3
563    int3
564#else
565    // Set up argument XMM registers.
566    leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
567    movq %rsi, %r11               // R11 := arg_array
568    LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
569    LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
570    LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
571    LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
572    LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
573    LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
574    LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
575    LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
576    .balign 16
577.Lxmm_setup_finished2:
578    PUSH rbp                      // Save rbp.
579    PUSH r8                       // Save r8/result*.
580    PUSH r9                       // Save r9/shorty*.
581    PUSH rbx                      // Save rbx
582    PUSH r12                      // Save r12
583    PUSH r13                      // Save r13
584    PUSH r14                      // Save r14
585    PUSH r15                      // Save r15
586    movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
587    CFI_DEF_CFA_REGISTER(rbp)
588
589    movl %edx, %r10d
590    addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
591                                   // r8, r9, r12, r13, r14, and r15 in frame.
592    andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
593    subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
594                                   // r13, r14, and r15.
595    subq %rdx, %rsp                // Reserve stack space for argument array.
596
597#if (STACK_REFERENCE_SIZE != 4)
598#error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
599#endif
600    movq LITERAL(0), (%rsp)        // Store null for method*
601
602    movl %r10d, %ecx               // Place size of args in rcx.
603    movq %rdi, %rax                // rax := method to be called
604    movq %rsi, %r11                // r11 := arg_array
605    leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
606                                   // stack arguments.
607    // Copy arg array into stack.
608    rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
609    leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
610    movq %rax, %rdi                // rdi := method to be called
611    LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
612    LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
613    LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
614    LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
615    LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
616.Lgpr_setup_finished2:
617    call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
618    movq %rbp, %rsp                // Restore stack pointer.
619    POP r15                        // Pop r15
620    POP r14                        // Pop r14
621    POP r13                        // Pop r13
622    POP r12                        // Pop r12
623    POP rbx                        // Pop rbx
624    POP r9                         // Pop r9 - shorty*.
625    POP r8                         // Pop r8 - result*.
626    POP rbp                        // Pop rbp
627    cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
628    je .Lreturn_double_quick2
629    cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
630    je .Lreturn_float_quick2
631    movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
632    ret
633.Lreturn_double_quick2:
634    movsd %xmm0, (%r8)             // Store the double floating point result.
635    ret
636.Lreturn_float_quick2:
637    movss %xmm0, (%r8)             // Store the floating point result.
638    ret
639#endif  // __APPLE__
640END_FUNCTION art_quick_invoke_static_stub
641
642    /*
643     * Long jump stub.
644     * On entry:
645     *   rdi = gprs
646     *   rsi = fprs
647     */
648DEFINE_FUNCTION art_quick_do_long_jump
649#if defined(__APPLE__)
650    int3
651    int3
652#else
653    // Restore FPRs.
654    movq 0(%rsi), %xmm0
655    movq 8(%rsi), %xmm1
656    movq 16(%rsi), %xmm2
657    movq 24(%rsi), %xmm3
658    movq 32(%rsi), %xmm4
659    movq 40(%rsi), %xmm5
660    movq 48(%rsi), %xmm6
661    movq 56(%rsi), %xmm7
662    movq 64(%rsi), %xmm8
663    movq 72(%rsi), %xmm9
664    movq 80(%rsi), %xmm10
665    movq 88(%rsi), %xmm11
666    movq 96(%rsi), %xmm12
667    movq 104(%rsi), %xmm13
668    movq 112(%rsi), %xmm14
669    movq 120(%rsi), %xmm15
670    // Restore FPRs.
671    movq %rdi, %rsp   // RSP points to gprs.
672    // Load all registers except RSP and RIP with values in gprs.
673    popq %r15
674    popq %r14
675    popq %r13
676    popq %r12
677    popq %r11
678    popq %r10
679    popq %r9
680    popq %r8
681    popq %rdi
682    popq %rsi
683    popq %rbp
684    addq LITERAL(8), %rsp   // Skip rsp
685    popq %rbx
686    popq %rdx
687    popq %rcx
688    popq %rax
689    popq %rsp      // Load stack pointer.
690    ret            // From higher in the stack pop rip.
691#endif  // __APPLE__
692END_FUNCTION art_quick_do_long_jump
693
694MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
695    DEFINE_FUNCTION VAR(c_name)
696    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
697    // Outgoing argument set up
698    movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current()
699    call VAR(cxx_name)                   // cxx_name(Thread*)
700    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
701    CALL_MACRO(return_macro)             // return or deliver exception
702    END_FUNCTION VAR(c_name)
703END_MACRO
704
705MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
706    DEFINE_FUNCTION VAR(c_name)
707    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
708    // Outgoing argument set up
709    movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
710    call VAR(cxx_name)                   // cxx_name(arg0, Thread*)
711    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
712    CALL_MACRO(return_macro)             // return or deliver exception
713    END_FUNCTION VAR(c_name)
714END_MACRO
715
716MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
717    DEFINE_FUNCTION VAR(c_name)
718    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
719    // Outgoing argument set up
720    movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
721    call VAR(cxx_name)                   // cxx_name(arg0, arg1, Thread*)
722    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
723    CALL_MACRO(return_macro)             // return or deliver exception
724    END_FUNCTION VAR(c_name)
725END_MACRO
726
727MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
728    DEFINE_FUNCTION VAR(c_name)
729    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
730    // Outgoing argument set up
731    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
732    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, Thread*)
733    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
734    CALL_MACRO(return_macro)            // return or deliver exception
735    END_FUNCTION VAR(c_name)
736END_MACRO
737
738MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
739    DEFINE_FUNCTION VAR(c_name)
740    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
741    // Outgoing argument set up
742    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
743    call VAR(cxx_name)                  // cxx_name(arg1, arg2, arg3, arg4, Thread*)
744    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
745    CALL_MACRO(return_macro)            // return or deliver exception
746    END_FUNCTION VAR(c_name)
747END_MACRO
748
749MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
750    DEFINE_FUNCTION VAR(c_name)
751    movq 8(%rsp), %rsi                  // pass referrer
752    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
753                                        // arg0 is in rdi
754    movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
755    call VAR(cxx_name)                  // cxx_name(arg0, referrer, Thread*)
756    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
757    CALL_MACRO(return_macro)
758    END_FUNCTION VAR(c_name)
759END_MACRO
760
761MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
762    DEFINE_FUNCTION VAR(c_name)
763    movq 8(%rsp), %rdx                  // pass referrer
764    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
765                                        // arg0 and arg1 are in rdi/rsi
766    movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
767    call VAR(cxx_name)                  // (arg0, arg1, referrer, Thread*)
768    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
769    CALL_MACRO(return_macro)
770    END_FUNCTION VAR(c_name)
771END_MACRO
772
773MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
774    DEFINE_FUNCTION VAR(c_name)
775    movq 8(%rsp), %rcx                  // pass referrer
776    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
777                                        // arg0, arg1, and arg2 are in rdi/rsi/rdx
778    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
779    call VAR(cxx_name)                  // cxx_name(arg0, arg1, arg2, referrer, Thread*)
780    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
781    CALL_MACRO(return_macro)            // return or deliver exception
782    END_FUNCTION VAR(c_name)
783END_MACRO
784
785MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
786    testq %rax, %rax               // rax == 0 ?
787    jz  1f                         // if rax == 0 goto 1
788    ret                            // return
7891:                                 // deliver exception on current thread
790    DELIVER_PENDING_EXCEPTION
791END_MACRO
792
793MACRO0(RETURN_IF_EAX_ZERO)
794    testl %eax, %eax               // eax == 0 ?
795    jnz  1f                        // if eax != 0 goto 1
796    ret                            // return
7971:                                 // deliver exception on current thread
798    DELIVER_PENDING_EXCEPTION
799END_MACRO
800
801MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
802    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
803    testq %rcx, %rcx               // rcx == 0 ?
804    jnz 1f                         // if rcx != 0 goto 1
805    ret                            // return
8061:                                 // deliver exception on current thread
807    DELIVER_PENDING_EXCEPTION
808END_MACRO
809
810// Generate the allocation entrypoints for each allocator.
811GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
812
813// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
814DEFINE_FUNCTION art_quick_alloc_object_rosalloc
815    // Fast path rosalloc allocation.
816    // RDI: type_idx, RSI: ArtMethod*, RAX: return value
817    // RDX, RCX, R8, R9: free.
818    movq   ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx   // Load dex cache resolved types array
819                                                              // Load the class (edx)
820    movl   0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
821    testl  %edx, %edx                                         // Check null class
822    jz     .Lart_quick_alloc_object_rosalloc_slow_path
823                                                              // Check class status.
824    cmpl   LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
825    jne    .Lart_quick_alloc_object_rosalloc_slow_path
826                                                              // We don't need a fence (between the
827                                                              // the status and the access flag
828                                                              // loads) here because every load is
829                                                              // a load acquire on x86.
830                                                              // Check access flags has
831                                                              // kAccClassIsFinalizable
832    testl  LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
833    jnz    .Lart_quick_alloc_object_rosalloc_slow_path
834                                                              // Check if the thread local
835                                                              // allocation stack has room.
836    movq   %gs:THREAD_SELF_OFFSET, %r8                        // r8 = thread
837    movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx     // rcx = alloc stack top.
838    cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
839    jae    .Lart_quick_alloc_object_rosalloc_slow_path
840                                                              // Load the object size
841    movl   MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %eax
842                                                              // Check if the size is for a thread
843                                                              // local allocation
844    cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
845    ja     .Lart_quick_alloc_object_rosalloc_slow_path
846                                                              // Compute the rosalloc bracket index
847                                                              // from the size.
848                                                              // Align up the size by the rosalloc
849                                                              // bracket quantum size and divide
850                                                              // by the quantum size and subtract
851                                                              // by 1. This code is a shorter but
852                                                              // equivalent version.
853    subq   LITERAL(1), %rax
854    shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
855                                                              // Load the rosalloc run (r9)
856    movq   THREAD_ROSALLOC_RUNS_OFFSET(%r8, %rax, __SIZEOF_POINTER__), %r9
857                                                              // Load the free list head (rax). This
858                                                              // will be the return val.
859    movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
860    testq  %rax, %rax
861    jz     .Lart_quick_alloc_object_rosalloc_slow_path
862    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
863                                                              // Push the new object onto the thread
864                                                              // local allocation stack and
865                                                              // increment the thread local
866                                                              // allocation stack top.
867    movl   %eax, (%rcx)
868    addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
869    movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
870                                                              // Load the next pointer of the head
871                                                              // and update the list head with the
872                                                              // next pointer.
873    movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
874    movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
875                                                              // Store the class pointer in the
876                                                              // header. This also overwrites the
877                                                              // next pointer. The offsets are
878                                                              // asserted to match.
879#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
880#error "Class pointer needs to overwrite next pointer."
881#endif
882    POISON_HEAP_REF edx
883    movl   %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
884                                                              // Decrement the size of the free list
885    decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
886                                                              // No fence necessary for x86.
887    ret
888.Lart_quick_alloc_object_rosalloc_slow_path:
889    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                         // save ref containing registers for GC
890    // Outgoing argument set up
891    movq %gs:THREAD_SELF_OFFSET, %rdx                         // pass Thread::Current()
892    call SYMBOL(artAllocObjectFromCodeRosAlloc)               // cxx_name(arg0, arg1, Thread*)
893    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                       // restore frame up to return address
894    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                   // return or deliver exception
895END_FUNCTION art_quick_alloc_object_rosalloc
896
897// A handle-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
898DEFINE_FUNCTION art_quick_alloc_object_tlab
899    // Fast path tlab allocation.
900    // RDI: uint32_t type_idx, RSI: ArtMethod*
901    // RDX, RCX, R8, R9: free. RAX: return val.
902    // TODO: Add read barrier when this function is used.
903    // Note this function can/should implement read barrier fast path only
904    // (no read barrier slow path) because this is the fast path of tlab allocation.
905    // We can fall back to the allocation slow path to do the read barrier slow path.
906#if defined(USE_READ_BARRIER)
907    int3
908    int3
909#endif
910    // Might need a special macro since rsi and edx is 32b/64b mismatched.
911    movq ART_METHOD_DEX_CACHE_TYPES_OFFSET_64(%rsi), %rdx  // Load dex cache resolved types array
912    // TODO: Add read barrier when this function is used.
913    // Might need to break down into multiple instructions to get the base address in a register.
914                                                               // Load the class
915    movl 0(%rdx, %rdi, COMPRESSED_REFERENCE_SIZE), %edx
916    testl %edx, %edx                                           // Check null class
917    jz   .Lart_quick_alloc_object_tlab_slow_path
918                                                               // Check class status.
919    cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
920    jne  .Lart_quick_alloc_object_tlab_slow_path
921                                                               // Check access flags has kAccClassIsFinalizable
922    testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
923    jnz  .Lart_quick_alloc_object_tlab_slow_path
924    movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
925    addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
926    andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
927    movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
928    movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos.
929    addq %rax, %rcx                                            // Add the object size.
930    cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
931    ja   .Lart_quick_alloc_object_tlab_slow_path
932    movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
933    addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increment thread_local_objects.
934                                                               // Store the class pointer in the header.
935                                                               // No fence needed for x86.
936    movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
937    ret                                                        // Fast path succeeded.
938.Lart_quick_alloc_object_tlab_slow_path:
939    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME                          // save ref containing registers for GC
940    // Outgoing argument set up
941    movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
942    call SYMBOL(artAllocObjectFromCodeTLAB)                    // cxx_name(arg0, arg1, Thread*)
943    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME                        // restore frame up to return address
944    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
945END_FUNCTION art_quick_alloc_object_tlab
946
947GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
948
949ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
950ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
951ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
952ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
953
954TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
955
956DEFINE_FUNCTION art_quick_lock_object
957    testl %edi, %edi                      // Null check object/rdi.
958    jz   .Lslow_lock
959.Lretry_lock:
960    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
961    test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
962    jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
963    movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
964    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
965    test %ecx, %ecx
966    jnz  .Lalready_thin                   // Lock word contains a thin lock.
967    // unlocked case - edx: original lock word, edi: obj.
968    movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
969    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
970    or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
971    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
972    jnz  .Lretry_lock                     // cmpxchg failed retry
973    ret
974.Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
975    movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
976    cmpw %cx, %dx                         // do we hold the lock already?
977    jne  .Lslow_lock
978    movl %edx, %ecx                       // copy the lock word to check count overflow.
979    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
980    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
981    test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set
982    jne  .Lslow_lock                      // count overflowed so go slow
983    movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
984    addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
985    // update lockword, cmpxchg necessary for read barrier bits.
986    lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
987    jnz  .Lretry_lock                     // cmpxchg failed retry
988    ret
989.Lslow_lock:
990    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
991    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
992    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
993    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
994    RETURN_IF_EAX_ZERO
995END_FUNCTION art_quick_lock_object
996
997DEFINE_FUNCTION art_quick_lock_object_no_inline
998    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
999    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1000    call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
1001    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
1002    RETURN_IF_EAX_ZERO
1003END_FUNCTION art_quick_lock_object_no_inline
1004
1005DEFINE_FUNCTION art_quick_unlock_object
1006    testl %edi, %edi                      // null check object/edi
1007    jz   .Lslow_unlock
1008.Lretry_unlock:
1009    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
1010    movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
1011    test LITERAL(LOCK_WORD_STATE_MASK), %ecx
1012    jnz  .Lslow_unlock                    // lock word contains a monitor
1013    cmpw %cx, %dx                         // does the thread id match?
1014    jne  .Lslow_unlock
1015    movl %ecx, %edx                       // copy the lock word to detect new count of 0.
1016    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
1017    cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
1018    jae  .Lrecursive_thin_unlock
1019    // update lockword, cmpxchg necessary for read barrier bits.
1020    movl %ecx, %eax                       // eax: old lock word.
1021    andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
1022#ifndef USE_READ_BARRIER
1023    movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1024#else
1025    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1026    jnz  .Lretry_unlock                   // cmpxchg failed retry
1027#endif
1028    ret
1029.Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
1030    // update lockword, cmpxchg necessary for read barrier bits.
1031    movl %ecx, %eax                       // eax: old lock word.
1032    subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
1033#ifndef USE_READ_BARRIER
1034    mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
1035#else
1036    lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
1037    jnz  .Lretry_unlock                   // cmpxchg failed retry
1038#endif
1039    ret
1040.Lslow_unlock:
1041    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
1042    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1043    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1044    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
1045    RETURN_IF_EAX_ZERO
1046END_FUNCTION art_quick_unlock_object
1047
1048DEFINE_FUNCTION art_quick_unlock_object_no_inline
1049    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
1050    movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
1051    call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
1052    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
1053    RETURN_IF_EAX_ZERO
1054END_FUNCTION art_quick_unlock_object_no_inline
1055
1056DEFINE_FUNCTION art_quick_check_cast
1057    PUSH rdi                          // Save args for exc
1058    PUSH rsi
1059    subq LITERAL(8), %rsp             // Alignment padding.
1060    CFI_ADJUST_CFA_OFFSET(8)
1061    SETUP_FP_CALLEE_SAVE_FRAME
1062    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
1063    testq %rax, %rax
1064    jz 1f                             // jump forward if not assignable
1065    RESTORE_FP_CALLEE_SAVE_FRAME
1066    addq LITERAL(24), %rsp            // pop arguments
1067    CFI_ADJUST_CFA_OFFSET(-24)
1068
1069    ret
1070
1071    CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
10721:
1073    RESTORE_FP_CALLEE_SAVE_FRAME
1074    addq LITERAL(8), %rsp             // pop padding
1075    CFI_ADJUST_CFA_OFFSET(-8)
1076    POP rsi                           // Pop arguments
1077    POP rdi
1078    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
1079    mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
1080    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
1081    UNREACHABLE
1082END_FUNCTION art_quick_check_cast
1083
1084
1085// Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
1086MACRO2(POP_REG_NE, reg, exclude_reg)
1087    .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
1088      addq MACRO_LITERAL(8), %rsp
1089      CFI_ADJUST_CFA_OFFSET(-8)
1090    .else
1091      POP RAW_VAR(reg)
1092    .endif
1093END_MACRO
1094
1095    /*
1096     * Macro to insert read barrier, used in art_quick_aput_obj.
1097     * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
1098     * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
1099     * 64b PUSH/POP and 32b argument.
1100     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1101     *
1102     * As with art_quick_aput_obj* functions, the 64b versions are in comments.
1103     */
1104MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
1105#ifdef USE_READ_BARRIER
1106    PUSH rax                            // save registers that might be used
1107    PUSH rdi
1108    PUSH rsi
1109    PUSH rdx
1110    PUSH rcx
1111    SETUP_FP_CALLEE_SAVE_FRAME
1112    // Outgoing argument set up
1113    // movl REG_VAR(ref_reg32), %edi    // pass ref, no-op for now since parameter ref is unused
1114    // // movq REG_VAR(ref_reg64), %rdi
1115    movl REG_VAR(obj_reg), %esi         // pass obj_reg
1116    // movq REG_VAR(obj_reg), %rsi
1117    movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
1118    // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
1119    call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
1120    // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
1121    .ifnc RAW_VAR(dest_reg32), eax
1122    // .ifnc RAW_VAR(dest_reg64), rax
1123      movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
1124      // movq %rax, REG_VAR(dest_reg64)
1125    .endif
1126    RESTORE_FP_CALLEE_SAVE_FRAME
1127    POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
1128    POP_REG_NE rdx, RAW_VAR(dest_reg64)
1129    POP_REG_NE rsi, RAW_VAR(dest_reg64)
1130    POP_REG_NE rdi, RAW_VAR(dest_reg64)
1131    POP_REG_NE rax, RAW_VAR(dest_reg64)
1132#else
1133    movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
1134    // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
1135    UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
1136#endif  // USE_READ_BARRIER
1137END_MACRO
1138
1139    /*
1140     * Entry from managed code for array put operations of objects where the value being stored
1141     * needs to be checked for compatibility.
1142     *
1143     * Currently all the parameters should fit into the 32b portions of the registers. Index always
1144     * will. So we optimize for a tighter encoding. The 64b versions are in comments.
1145     *
1146     * rdi(edi) = array, rsi(esi) = index, rdx(edx) = value
1147     */
1148DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
1149#if defined(__APPLE__)
1150    int3
1151    int3
1152#else
1153    testl %edi, %edi
1154//  testq %rdi, %rdi
1155    jnz art_quick_aput_obj_with_bound_check
1156    jmp art_quick_throw_null_pointer_exception
1157#endif  // __APPLE__
1158END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
1159
1160
1161DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
1162#if defined(__APPLE__)
1163    int3
1164    int3
1165#else
1166    movl MIRROR_ARRAY_LENGTH_OFFSET(%edi), %ecx
1167//  movl MIRROR_ARRAY_LENGTH_OFFSET(%rdi), %ecx  // This zero-extends, so value(%rcx)=value(%ecx)
1168    cmpl %ecx, %esi
1169    jb art_quick_aput_obj
1170    mov %esi, %edi
1171//  mov %rsi, %rdi
1172    mov %ecx, %esi
1173//  mov %rcx, %rsi
1174    jmp art_quick_throw_array_bounds
1175#endif  // __APPLE__
1176END_FUNCTION art_quick_aput_obj_with_bound_check
1177
1178
1179DEFINE_FUNCTION art_quick_aput_obj
1180    testl %edx, %edx                // store of null
1181//  test %rdx, %rdx
1182    jz .Ldo_aput_null
1183    READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1184    // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
1185    READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1186    // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
1187#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1188    READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
1189    // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
1190    cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
1191#else
1192    cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
1193//  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
1194#endif
1195    jne .Lcheck_assignability
1196.Ldo_aput:
1197    POISON_HEAP_REF edx
1198    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1199//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1200    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1201    shrl LITERAL(7), %edi
1202//  shrl LITERAL(7), %rdi
1203    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1204    ret
1205.Ldo_aput_null:
1206    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1207//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1208    ret
1209.Lcheck_assignability:
1210    // Save arguments.
1211    PUSH rdi
1212    PUSH rsi
1213    PUSH rdx
1214    SETUP_FP_CALLEE_SAVE_FRAME
1215
1216#if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
1217    // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
1218    movl %eax, %esi               // Pass arg2 = value's class.
1219    // movq %rax, %rsi
1220#else
1221                                     // "Uncompress" = do nothing, as already zero-extended on load.
1222    movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
1223#endif
1224    movq %rcx, %rdi               // Pass arg1 = array's component type.
1225
1226    call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
1227
1228    // Exception?
1229    testq %rax, %rax
1230    jz   .Lthrow_array_store_exception
1231
1232    RESTORE_FP_CALLEE_SAVE_FRAME
1233    // Restore arguments.
1234    POP  rdx
1235    POP  rsi
1236    POP  rdi
1237
1238    POISON_HEAP_REF edx
1239    movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
1240//  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
1241    movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
1242    shrl LITERAL(7), %edi
1243//  shrl LITERAL(7), %rdi
1244    movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
1245//  movb %dl, (%rdx, %rdi)
1246    ret
1247    CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
1248.Lthrow_array_store_exception:
1249    RESTORE_FP_CALLEE_SAVE_FRAME
1250    // Restore arguments.
1251    POP  rdx
1252    POP  rsi
1253    POP  rdi
1254
1255    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
1256
1257    // Outgoing argument set up.
1258    movq %rdx, %rsi                         // Pass arg 2 = value.
1259    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
1260                                            // Pass arg 1 = array.
1261    call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
1262    UNREACHABLE
1263END_FUNCTION art_quick_aput_obj
1264
1265// TODO: This is quite silly on X86_64 now.
1266DEFINE_FUNCTION art_quick_memcpy
1267    call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
1268    ret
1269END_FUNCTION art_quick_memcpy
1270
1271NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
1272
1273UNIMPLEMENTED art_quick_ldiv
1274UNIMPLEMENTED art_quick_lmod
1275UNIMPLEMENTED art_quick_lmul
1276UNIMPLEMENTED art_quick_lshl
1277UNIMPLEMENTED art_quick_lshr
1278UNIMPLEMENTED art_quick_lushr
1279
1280THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_EAX_ZERO
1281THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_EAX_ZERO
1282THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
1283THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
1284THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
1285
1286TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1287TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1288TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1289TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1290TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1291TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1292TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1293
1294TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCode, RETURN_IF_EAX_ZERO
1295TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCode, RETURN_IF_EAX_ZERO
1296TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
1297TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
1298
1299ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1300ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1301ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1302ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1303ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1304ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1305ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
1306
1307// This is singled out as the argument order is different.
1308DEFINE_FUNCTION art_quick_set64_static
1309                                         // new_val is already in %rdx
1310    movq 8(%rsp), %rsi                   // pass referrer
1311    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
1312                                         // field_idx is in rdi
1313    movq %gs:THREAD_SELF_OFFSET, %rcx    // pass Thread::Current()
1314    call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
1315    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
1316    RETURN_IF_EAX_ZERO                   // return or deliver exception
1317END_FUNCTION art_quick_set64_static
1318
1319
1320DEFINE_FUNCTION art_quick_proxy_invoke_handler
1321    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
1322
1323    movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
1324    movq %rsp, %rcx                         // Pass SP.
1325    call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
1326    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1327    movq %rax, %xmm0                        // Copy return value in case of float returns.
1328    RETURN_OR_DELIVER_PENDING_EXCEPTION
1329END_FUNCTION art_quick_proxy_invoke_handler
1330
1331    /*
1332     * Called to resolve an imt conflict.
1333     * rdi is the conflict ArtMethod.
1334     * rax is a hidden argument that holds the target interface method's dex method index.
1335     *
1336     * Note that this stub writes to r10 and rdi.
1337     */
1338DEFINE_FUNCTION art_quick_imt_conflict_trampoline
1339#if defined(__APPLE__)
1340    int3
1341    int3
1342#else
1343    movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer
1344    movq ART_METHOD_DEX_CACHE_METHODS_OFFSET_64(%r10), %r10   // Load dex cache methods array
1345    movq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load interface method
1346    movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
1347.Limt_table_iterate:
1348    cmpq %r10, 0(%rdi)
1349    jne .Limt_table_next_entry
1350    // We successfully hit an entry in the table. Load the target method
1351    // and jump to it.
1352    movq __SIZEOF_POINTER__(%rdi), %rdi
1353    jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
1354.Limt_table_next_entry:
1355    // If the entry is null, the interface method is not in the ImtConflictTable.
1356    cmpq LITERAL(0), 0(%rdi)
1357    jz .Lconflict_trampoline
1358    // Iterate over the entries of the ImtConflictTable.
1359    addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
1360    jmp .Limt_table_iterate
1361.Lconflict_trampoline:
1362    // Call the runtime stub to populate the ImtConflictTable and jump to the
1363    // resolved method.
1364    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
1365#endif  // __APPLE__
1366END_FUNCTION art_quick_imt_conflict_trampoline
1367
1368DEFINE_FUNCTION art_quick_resolution_trampoline
1369    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1370    movq %gs:THREAD_SELF_OFFSET, %rdx
1371    movq %rsp, %rcx
1372    call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
1373    movq %rax, %r10               // Remember returned code pointer in R10.
1374    movq (%rsp), %rdi             // Load called method into RDI.
1375    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1376    testq %r10, %r10              // If code pointer is null goto deliver pending exception.
1377    jz 1f
1378    jmp *%r10                     // Tail call into method.
13791:
1380    DELIVER_PENDING_EXCEPTION
1381END_FUNCTION art_quick_resolution_trampoline
1382
1383/* Generic JNI frame layout:
1384 *
1385 * #-------------------#
1386 * |                   |
1387 * | caller method...  |
1388 * #-------------------#    <--- SP on entry
1389 *
1390 *          |
1391 *          V
1392 *
1393 * #-------------------#
1394 * | caller method...  |
1395 * #-------------------#
1396 * | Return            |
1397 * | R15               |    callee save
1398 * | R14               |    callee save
1399 * | R13               |    callee save
1400 * | R12               |    callee save
1401 * | R9                |    arg5
1402 * | R8                |    arg4
1403 * | RSI/R6            |    arg1
1404 * | RBP/R5            |    callee save
1405 * | RBX/R3            |    callee save
1406 * | RDX/R2            |    arg2
1407 * | RCX/R1            |    arg3
1408 * | XMM7              |    float arg 8
1409 * | XMM6              |    float arg 7
1410 * | XMM5              |    float arg 6
1411 * | XMM4              |    float arg 5
1412 * | XMM3              |    float arg 4
1413 * | XMM2              |    float arg 3
1414 * | XMM1              |    float arg 2
1415 * | XMM0              |    float arg 1
1416 * | RDI/Method*       |  <- sp
1417 * #-------------------#
1418 * | Scratch Alloca    |    5K scratch space
1419 * #---------#---------#
1420 * |         | sp*     |
1421 * | Tramp.  #---------#
1422 * | args    | thread  |
1423 * | Tramp.  #---------#
1424 * |         | method  |
1425 * #-------------------#    <--- SP on artQuickGenericJniTrampoline
1426 *
1427 *           |
1428 *           v              artQuickGenericJniTrampoline
1429 *
1430 * #-------------------#
1431 * | caller method...  |
1432 * #-------------------#
1433 * | Return            |
1434 * | Callee-Save Data  |
1435 * #-------------------#
1436 * | handle scope      |
1437 * #-------------------#
1438 * | Method*           |    <--- (1)
1439 * #-------------------#
1440 * | local ref cookie  | // 4B
1441 * | handle scope size | // 4B   TODO: roll into call stack alignment?
1442 * #-------------------#
1443 * | JNI Call Stack    |
1444 * #-------------------#    <--- SP on native call
1445 * |                   |
1446 * | Stack for Regs    |    The trampoline assembly will pop these values
1447 * |                   |    into registers for native call
1448 * #-------------------#
1449 * | Native code ptr   |
1450 * #-------------------#
1451 * | Free scratch      |
1452 * #-------------------#
1453 * | Ptr to (1)        |    <--- RSP
1454 * #-------------------#
1455 */
1456    /*
1457     * Called to do a generic JNI down-call
1458     */
1459DEFINE_FUNCTION art_quick_generic_jni_trampoline
1460    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
1461
1462    movq %rsp, %rbp                 // save SP at (old) callee-save frame
1463    CFI_DEF_CFA_REGISTER(rbp)
1464
1465    //
1466    // reserve a lot of space
1467    //
1468    //      4    local state ref
1469    //      4    padding
1470    //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
1471    //     16    handle scope member fields ?
1472    // +  112    14x 8-byte stack-2-register space
1473    // ------
1474    //   4332
1475    // 16-byte aligned: 4336
1476    // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
1477    //       Also means: the padding is somewhere in the middle
1478    //
1479    //
1480    // New test: use 5K and release
1481    // 5k = 5120
1482    subq LITERAL(5120), %rsp
1483    // prepare for artQuickGenericJniTrampoline call
1484    // (Thread*,  SP)
1485    //    rdi    rsi      <= C calling convention
1486    //  gs:...   rbp      <= where they are
1487    movq %gs:THREAD_SELF_OFFSET, %rdi
1488    movq %rbp, %rsi
1489    call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
1490
1491    // The C call will have registered the complete save-frame on success.
1492    // The result of the call is:
1493    // %rax: pointer to native code, 0 on error.
1494    // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
1495
1496    // Check for error = 0.
1497    test %rax, %rax
1498    jz .Lexception_in_native
1499
1500    // Release part of the alloca.
1501    movq %rdx, %rsp
1502
1503    // pop from the register-passing alloca region
1504    // what's the right layout?
1505    popq %rdi
1506    popq %rsi
1507    popq %rdx
1508    popq %rcx
1509    popq %r8
1510    popq %r9
1511    // TODO: skip floating point if unused, some flag.
1512    movq 0(%rsp), %xmm0
1513    movq 8(%rsp), %xmm1
1514    movq 16(%rsp), %xmm2
1515    movq 24(%rsp), %xmm3
1516    movq 32(%rsp), %xmm4
1517    movq 40(%rsp), %xmm5
1518    movq 48(%rsp), %xmm6
1519    movq 56(%rsp), %xmm7
1520    addq LITERAL(64), %rsp          // floating-point done
1521
1522    // native call
1523    call *%rax
1524
1525    // result sign extension is handled in C code
1526    // prepare for artQuickGenericJniEndTrampoline call
1527    // (Thread*,  result, result_f)
1528    //   rdi      rsi   rdx       <= C calling convention
1529    //  gs:...    rax   xmm0      <= where they are
1530    movq %gs:THREAD_SELF_OFFSET, %rdi
1531    movq %rax, %rsi
1532    movq %xmm0, %rdx
1533    call SYMBOL(artQuickGenericJniEndTrampoline)
1534
1535    // Pending exceptions possible.
1536    // TODO: use cmpq, needs direct encoding because of gas bug
1537    movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
1538    test %rcx, %rcx
1539    jnz .Lexception_in_native
1540
1541    // Tear down the alloca.
1542    movq %rbp, %rsp
1543    CFI_DEF_CFA_REGISTER(rsp)
1544
1545    // Tear down the callee-save frame.
1546    // Load FPRs.
1547    // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
1548    movq 24(%rsp), %xmm1            // neither does this!!!
1549    movq 32(%rsp), %xmm2
1550    movq 40(%rsp), %xmm3
1551    movq 48(%rsp), %xmm4
1552    movq 56(%rsp), %xmm5
1553    movq 64(%rsp), %xmm6
1554    movq 72(%rsp), %xmm7
1555    movq 80(%rsp), %xmm12
1556    movq 88(%rsp), %xmm13
1557    movq 96(%rsp), %xmm14
1558    movq 104(%rsp), %xmm15
1559    // was 80 bytes
1560    addq LITERAL(80 + 4*8), %rsp
1561    CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
1562    // Save callee and GPR args, mixed together to agree with core spills bitmap.
1563    POP rcx  // Arg.
1564    POP rdx  // Arg.
1565    POP rbx  // Callee save.
1566    POP rbp  // Callee save.
1567    POP rsi  // Arg.
1568    POP r8   // Arg.
1569    POP r9   // Arg.
1570    POP r12  // Callee save.
1571    POP r13  // Callee save.
1572    POP r14  // Callee save.
1573    POP r15  // Callee save.
1574    // store into fpr, for when it's a fpr return...
1575    movq %rax, %xmm0
1576    ret
1577.Lexception_in_native:
1578    movq %gs:THREAD_TOP_QUICK_FRAME_OFFSET, %rsp
1579    CFI_DEF_CFA_REGISTER(rsp)
1580    // Do a call to push a new save-all frame required by the runtime.
1581    call .Lexception_call
1582.Lexception_call:
1583    DELIVER_PENDING_EXCEPTION
1584END_FUNCTION art_quick_generic_jni_trampoline
1585
1586    /*
1587     * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
1588     * of a quick call:
1589     * RDI = method being called / to bridge to.
1590     * RSI, RDX, RCX, R8, R9 are arguments to that method.
1591     */
1592DEFINE_FUNCTION art_quick_to_interpreter_bridge
1593    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
1594    movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
1595    movq %rsp, %rdx                        // RDX := sp
1596    call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
1597    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
1598    movq %rax, %xmm0                   // Place return value also into floating point return value.
1599    RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
1600END_FUNCTION art_quick_to_interpreter_bridge
1601
1602    /*
1603     * Routine that intercepts method calls and returns.
1604     */
1605DEFINE_FUNCTION art_quick_instrumentation_entry
1606#if defined(__APPLE__)
1607    int3
1608    int3
1609#else
1610    SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1611
1612    movq %rdi, %r12               // Preserve method pointer in a callee-save.
1613
1614    movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
1615    movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %rcx   // Pass return PC.
1616
1617    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
1618
1619                                  // %rax = result of call.
1620    movq %r12, %rdi               // Reload method pointer.
1621
1622    leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
1623    movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
1624
1625    RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
1626
1627    jmp *%rax                     // Tail call to intended method.
1628#endif  // __APPLE__
1629END_FUNCTION art_quick_instrumentation_entry
1630
1631DEFINE_FUNCTION art_quick_instrumentation_exit
1632    pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
1633
1634    SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
1635
1636    // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
1637    // we would need to fully restore it. As there are a good number of callee-save registers, it
1638    // seems easier to have an extra small stack area. But this should be revisited.
1639
1640    movq  %rsp, %rsi                          // Pass SP.
1641
1642    PUSH rax                  // Save integer result.
1643    subq LITERAL(8), %rsp     // Save floating-point result.
1644    CFI_ADJUST_CFA_OFFSET(8)
1645    movq %xmm0, (%rsp)
1646
1647    movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
1648    movq  %rax, %rdx                          // Pass integer result.
1649    movq  %xmm0, %rcx                         // Pass floating-point result.
1650
1651    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
1652
1653    movq  %rax, %rdi          // Store return PC
1654    movq  %rdx, %rsi          // Store second return PC in hidden arg.
1655
1656    movq (%rsp), %xmm0        // Restore floating-point result.
1657    addq LITERAL(8), %rsp
1658    CFI_ADJUST_CFA_OFFSET(-8)
1659    POP rax                   // Restore integer result.
1660
1661    RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
1662
1663    addq LITERAL(8), %rsp     // Drop fake return pc.
1664
1665    jmp   *%rdi               // Return.
1666END_FUNCTION art_quick_instrumentation_exit
1667
1668    /*
1669     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
1670     * will long jump to the upcall with a special exception of -1.
1671     */
1672DEFINE_FUNCTION art_quick_deoptimize
1673    pushq %rsi                         // Entry point for a jump. Fake that we were called.
1674                                       // Use hidden arg.
1675    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
1676                                       // Stack should be aligned now.
1677    movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
1678    call SYMBOL(artDeoptimize)         // artDeoptimize(Thread*)
1679    UNREACHABLE
1680END_FUNCTION art_quick_deoptimize
1681
1682    /*
1683     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
1684     * will long jump to the interpreter bridge.
1685     */
1686DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
1687    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
1688                                                // Stack should be aligned now.
1689    movq %gs:THREAD_SELF_OFFSET, %rdi           // Pass Thread.
1690    call SYMBOL(artDeoptimizeFromCompiledCode)  // artDeoptimizeFromCompiledCode(Thread*)
1691    UNREACHABLE
1692END_FUNCTION art_quick_deoptimize_from_compiled_code
1693
1694    /*
1695     * String's compareTo.
1696     *
1697     * On entry:
1698     *    rdi:   this string object (known non-null)
1699     *    rsi:   comp string object (known non-null)
1700     */
1701DEFINE_FUNCTION art_quick_string_compareto
1702    movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
1703    movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
1704    /* Build pointers to the start of string data */
1705    leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
1706    leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
1707    /* Calculate min length and count diff */
1708    movl  %r8d, %ecx
1709    movl  %r8d, %eax
1710    subl  %r9d, %eax
1711    cmovg %r9d, %ecx
1712    /*
1713     * At this point we have:
1714     *   eax: value to return if first part of strings are equal
1715     *   ecx: minimum among the lengths of the two strings
1716     *   esi: pointer to comp string data
1717     *   edi: pointer to this string data
1718     */
1719    jecxz .Lkeep_length
1720    repe cmpsw                    // find nonmatching chars in [%esi] and [%edi], up to length %ecx
1721    jne .Lnot_equal
1722.Lkeep_length:
1723    ret
1724    .balign 16
1725.Lnot_equal:
1726    movzwl  -2(%edi), %eax        // get last compared char from this string
1727    movzwl  -2(%esi), %ecx        // get last compared char from comp string
1728    subl  %ecx, %eax              // return the difference
1729    ret
1730END_FUNCTION art_quick_string_compareto
1731
1732UNIMPLEMENTED art_quick_memcmp16
1733
1734DEFINE_FUNCTION art_quick_assignable_from_code
1735    SETUP_FP_CALLEE_SAVE_FRAME
1736    subq LITERAL(8), %rsp                      // Alignment padding.
1737    CFI_ADJUST_CFA_OFFSET(8)
1738    call SYMBOL(artIsAssignableFromCode)       // (const mirror::Class*, const mirror::Class*)
1739    addq LITERAL(8), %rsp
1740    CFI_ADJUST_CFA_OFFSET(-8)
1741    RESTORE_FP_CALLEE_SAVE_FRAME
1742    ret
1743END_FUNCTION art_quick_assignable_from_code
1744
1745
1746// Return from a nested signal:
1747// Entry:
1748//  rdi: address of jmp_buf in TLS
1749
1750DEFINE_FUNCTION art_nested_signal_return
1751                                    // first arg to longjmp is already in correct register
1752    movq LITERAL(1), %rsi           // second arg to longjmp (1)
1753    call PLT_SYMBOL(longjmp)
1754    UNREACHABLE
1755END_FUNCTION art_nested_signal_return
1756
1757DEFINE_FUNCTION art_quick_read_barrier_mark
1758    SETUP_FP_CALLEE_SAVE_FRAME
1759    subq LITERAL(8), %rsp           // Alignment padding.
1760    CFI_ADJUST_CFA_OFFSET(8)
1761    call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
1762    addq LITERAL(8), %rsp
1763    CFI_ADJUST_CFA_OFFSET(-8)
1764    RESTORE_FP_CALLEE_SAVE_FRAME
1765    ret
1766END_FUNCTION art_quick_read_barrier_slow
1767
1768DEFINE_FUNCTION art_quick_read_barrier_slow
1769    SETUP_FP_CALLEE_SAVE_FRAME
1770    subq LITERAL(8), %rsp           // Alignment padding.
1771    CFI_ADJUST_CFA_OFFSET(8)
1772    call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
1773    addq LITERAL(8), %rsp
1774    CFI_ADJUST_CFA_OFFSET(-8)
1775    RESTORE_FP_CALLEE_SAVE_FRAME
1776    ret
1777END_FUNCTION art_quick_read_barrier_slow
1778
1779DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
1780    SETUP_FP_CALLEE_SAVE_FRAME
1781    subq LITERAL(8), %rsp                  // Alignment padding.
1782    CFI_ADJUST_CFA_OFFSET(8)
1783    call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
1784    addq LITERAL(8), %rsp
1785    CFI_ADJUST_CFA_OFFSET(-8)
1786    RESTORE_FP_CALLEE_SAVE_FRAME
1787    ret
1788END_FUNCTION art_quick_read_barrier_for_root_slow
1789
1790    /*
1791     * On stack replacement stub.
1792     * On entry:
1793     *   [sp] = return address
1794     *   rdi = stack to copy
1795     *   rsi = size of stack
1796     *   rdx = pc to call
1797     *   rcx = JValue* result
1798     *   r8 = shorty
1799     *   r9 = thread
1800     *
1801     * Note that the native C ABI already aligned the stack to 16-byte.
1802     */
1803DEFINE_FUNCTION art_quick_osr_stub
1804    // Save the non-volatiles.
1805    PUSH rbp                      // Save rbp.
1806    PUSH rcx                      // Save rcx/result*.
1807    PUSH r8                       // Save r8/shorty*.
1808
1809    // Save callee saves.
1810    PUSH rbx
1811    PUSH r12
1812    PUSH r13
1813    PUSH r14
1814    PUSH r15
1815
1816    pushq LITERAL(0)              // Push null for ArtMethod*.
1817    movl %esi, %ecx               // rcx := size of stack
1818    movq %rdi, %rsi               // rsi := stack to copy
1819    call .Losr_entry
1820
1821    // Restore stack and callee-saves.
1822    addq LITERAL(8), %rsp
1823    POP r15
1824    POP r14
1825    POP r13
1826    POP r12
1827    POP rbx
1828    POP r8
1829    POP rcx
1830    POP rbp
1831    cmpb LITERAL(68), (%r8)        // Test if result type char == 'D'.
1832    je .Losr_return_double_quick
1833    cmpb LITERAL(70), (%r8)        // Test if result type char == 'F'.
1834    je .Losr_return_float_quick
1835    movq %rax, (%rcx)              // Store the result assuming its a long, int or Object*
1836    ret
1837.Losr_return_double_quick:
1838    movsd %xmm0, (%rcx)            // Store the double floating point result.
1839    ret
1840.Losr_return_float_quick:
1841    movss %xmm0, (%rcx)            // Store the floating point result.
1842    ret
1843.Losr_entry:
1844    subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
1845    subq %rcx, %rsp
1846    movq %rsp, %rdi               // rdi := beginning of stack
1847    rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
1848    jmp *%rdx
1849END_FUNCTION art_quick_osr_stub
1850