quick_entrypoints_arm64.S revision 0a87a653a296854c9a0abacd9bb1557ee4c4d05d
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm64.S"
18
19#include "arch/quick_alloc_entrypoints.S"
20
21
22.macro INCREASE_FRAME frame_adjustment
23    sub sp, sp, #(\frame_adjustment)
24    .cfi_adjust_cfa_offset (\frame_adjustment)
25.endm
26
27.macro DECREASE_FRAME frame_adjustment
28    add sp, sp, #(\frame_adjustment)
29    .cfi_adjust_cfa_offset -(\frame_adjustment)
30.endm
31
32.macro SAVE_REG reg, offset
33    str \reg, [sp, #(\offset)]
34    .cfi_rel_offset \reg, (\offset)
35.endm
36
37.macro RESTORE_REG reg, offset
38    ldr \reg, [sp, #(\offset)]
39    .cfi_restore \reg
40.endm
41
42.macro SAVE_REG_INCREASE_FRAME reg, frame_adjustment
43    str \reg, [sp, #-(\frame_adjustment)]!
44    .cfi_adjust_cfa_offset (\frame_adjustment)
45    .cfi_rel_offset \reg, 0
46.endm
47
48.macro RESTORE_REG_DECREASE_FRAME reg, frame_adjustment
49    ldr \reg, [sp], #(\frame_adjustment)
50    .cfi_restore \reg
51    .cfi_adjust_cfa_offset -(\frame_adjustment)
52.endm
53
54.macro SAVE_TWO_REGS reg1, reg2, offset
55    stp \reg1, \reg2, [sp, #(\offset)]
56    .cfi_rel_offset \reg1, (\offset)
57    .cfi_rel_offset \reg2, (\offset) + 8
58.endm
59
60.macro RESTORE_TWO_REGS reg1, reg2, offset
61    ldp \reg1, \reg2, [sp, #(\offset)]
62    .cfi_restore \reg1
63    .cfi_restore \reg2
64.endm
65
66.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
67    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
68    .cfi_adjust_cfa_offset (\frame_adjustment)
69    .cfi_rel_offset \reg1, 0
70    .cfi_rel_offset \reg2, 8
71.endm
72
73.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
74    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
75    .cfi_restore \reg1
76    .cfi_restore \reg2
77    .cfi_adjust_cfa_offset -(\frame_adjustment)
78.endm
79
80    /*
81     * Macro that sets up the callee save frame to conform with
82     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
83     */
84.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
85    // art::Runtime** xIP0 = &art::Runtime::instance_
86    adrp xIP0, :got:_ZN3art7Runtime9instance_E
87    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
88
89    // Our registers aren't intermixed - just spill in order.
90    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
91
92    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
93    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
94
95    INCREASE_FRAME 176
96
97    // Ugly compile-time check, but we only have the preprocessor.
98#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
99#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
100#endif
101
102    // Stack alignment filler [sp, #8].
103    // FP callee-saves.
104    stp d8, d9,   [sp, #16]
105    stp d10, d11, [sp, #32]
106    stp d12, d13, [sp, #48]
107    stp d14, d15, [sp, #64]
108
109    // GP callee-saves
110    SAVE_TWO_REGS x19, x20, 80
111    SAVE_TWO_REGS x21, x22, 96
112    SAVE_TWO_REGS x23, x24, 112
113    SAVE_TWO_REGS x25, x26, 128
114    SAVE_TWO_REGS x27, x28, 144
115    SAVE_TWO_REGS x29, xLR, 160
116
117    // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
118    str xIP0, [sp]
119    // Place sp in Thread::Current()->top_quick_frame.
120    mov xIP0, sp
121    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
122.endm
123
124    /*
125     * Macro that sets up the callee save frame to conform with
126     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
127     */
128.macro SETUP_SAVE_REFS_ONLY_FRAME
129    // art::Runtime** xIP0 = &art::Runtime::instance_
130    adrp xIP0, :got:_ZN3art7Runtime9instance_E
131    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
132
133    // Our registers aren't intermixed - just spill in order.
134    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
135
136    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
137    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
138
139    INCREASE_FRAME 96
140
141    // Ugly compile-time check, but we only have the preprocessor.
142#if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
143#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
144#endif
145
146    // GP callee-saves.
147    // x20 paired with ArtMethod* - see below.
148    SAVE_TWO_REGS x21, x22, 16
149    SAVE_TWO_REGS x23, x24, 32
150    SAVE_TWO_REGS x25, x26, 48
151    SAVE_TWO_REGS x27, x28, 64
152    SAVE_TWO_REGS x29, xLR, 80
153
154    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
155    // Note: We could avoid saving X20 in the case of Baker read
156    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
157    // later; but it's not worth handling this special case.
158    stp xIP0, x20, [sp]
159    .cfi_rel_offset x20, 8
160
161    // Place sp in Thread::Current()->top_quick_frame.
162    mov xIP0, sp
163    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
164.endm
165
166// TODO: Probably no need to restore registers preserved by aapcs64.
167.macro RESTORE_SAVE_REFS_ONLY_FRAME
168    // Callee-saves.
169    // Note: Likewise, we could avoid restoring X20 in the case of Baker
170    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
171    // later; but it's not worth handling this special case.
172    RESTORE_REG x20, 8
173    RESTORE_TWO_REGS x21, x22, 16
174    RESTORE_TWO_REGS x23, x24, 32
175    RESTORE_TWO_REGS x25, x26, 48
176    RESTORE_TWO_REGS x27, x28, 64
177    RESTORE_TWO_REGS x29, xLR, 80
178
179    DECREASE_FRAME 96
180.endm
181
182.macro POP_SAVE_REFS_ONLY_FRAME
183    DECREASE_FRAME 96
184.endm
185
186
187.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
188    INCREASE_FRAME 224
189
190    // Ugly compile-time check, but we only have the preprocessor.
191#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
192#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
193#endif
194
195    // Stack alignment filler [sp, #8].
196    // FP args.
197    stp d0, d1, [sp, #16]
198    stp d2, d3, [sp, #32]
199    stp d4, d5, [sp, #48]
200    stp d6, d7, [sp, #64]
201
202    // Core args.
203    SAVE_TWO_REGS x1, x2, 80
204    SAVE_TWO_REGS x3, x4, 96
205    SAVE_TWO_REGS x5, x6, 112
206
207    // x7, Callee-saves.
208    // Note: We could avoid saving X20 in the case of Baker read
209    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
210    // later; but it's not worth handling this special case.
211    SAVE_TWO_REGS x7, x20, 128
212    SAVE_TWO_REGS x21, x22, 144
213    SAVE_TWO_REGS x23, x24, 160
214    SAVE_TWO_REGS x25, x26, 176
215    SAVE_TWO_REGS x27, x28, 192
216
217    // x29(callee-save) and LR.
218    SAVE_TWO_REGS x29, xLR, 208
219
220.endm
221
222    /*
223     * Macro that sets up the callee save frame to conform with
224     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
225     *
226     * TODO This is probably too conservative - saving FP & LR.
227     */
228.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
229    // art::Runtime** xIP0 = &art::Runtime::instance_
230    adrp xIP0, :got:_ZN3art7Runtime9instance_E
231    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
232
233    // Our registers aren't intermixed - just spill in order.
234    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
235
236    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
237    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
238
239    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
240
241    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
242    // Place sp in Thread::Current()->top_quick_frame.
243    mov xIP0, sp
244    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
245.endm
246
247.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
248    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
249    str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
250    // Place sp in Thread::Current()->top_quick_frame.
251    mov xIP0, sp
252    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
253.endm
254
255// TODO: Probably no need to restore registers preserved by aapcs64.
256.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
257    // FP args.
258    ldp d0, d1, [sp, #16]
259    ldp d2, d3, [sp, #32]
260    ldp d4, d5, [sp, #48]
261    ldp d6, d7, [sp, #64]
262
263    // Core args.
264    RESTORE_TWO_REGS x1, x2, 80
265    RESTORE_TWO_REGS x3, x4, 96
266    RESTORE_TWO_REGS x5, x6, 112
267
268    // x7, Callee-saves.
269    // Note: Likewise, we could avoid restoring X20 in the case of Baker
270    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
271    // later; but it's not worth handling this special case.
272    RESTORE_TWO_REGS x7, x20, 128
273    RESTORE_TWO_REGS x21, x22, 144
274    RESTORE_TWO_REGS x23, x24, 160
275    RESTORE_TWO_REGS x25, x26, 176
276    RESTORE_TWO_REGS x27, x28, 192
277
278    // x29(callee-save) and LR.
279    RESTORE_TWO_REGS x29, xLR, 208
280
281    DECREASE_FRAME 224
282.endm
283
284    /*
285     * Macro that sets up the callee save frame to conform with
286     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
287     * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
288     * and saving registers x29 and LR is handled elsewhere.
289     */
290.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
291    // Ugly compile-time check, but we only have the preprocessor.
292#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
293#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
294#endif
295
296    // Save FP registers.
297    // For better performance, store d0 and d31 separately, so that all STPs are 16-byte aligned.
298    str d0,       [sp, #8]
299    stp d1, d2,   [sp, #16]
300    stp d3, d4,   [sp, #32]
301    stp d5, d6,   [sp, #48]
302    stp d7, d8,   [sp, #64]
303    stp d9, d10,  [sp, #80]
304    stp d11, d12, [sp, #96]
305    stp d13, d14, [sp, #112]
306    stp d15, d16, [sp, #128]
307    stp d17, d18, [sp, #144]
308    stp d19, d20, [sp, #160]
309    stp d21, d22, [sp, #176]
310    stp d23, d24, [sp, #192]
311    stp d25, d26, [sp, #208]
312    stp d27, d28, [sp, #224]
313    stp d29, d30, [sp, #240]
314    str d31,      [sp, #256]
315
316    // Save core registers.
317    SAVE_REG            x0, 264
318    SAVE_TWO_REGS  x1,  x2, 272
319    SAVE_TWO_REGS  x3,  x4, 288
320    SAVE_TWO_REGS  x5,  x6, 304
321    SAVE_TWO_REGS  x7,  x8, 320
322    SAVE_TWO_REGS  x9, x10, 336
323    SAVE_TWO_REGS x11, x12, 352
324    SAVE_TWO_REGS x13, x14, 368
325    SAVE_TWO_REGS x15, x16, 384
326    SAVE_TWO_REGS x17, x18, 400
327    SAVE_TWO_REGS x19, x20, 416
328    SAVE_TWO_REGS x21, x22, 432
329    SAVE_TWO_REGS x23, x24, 448
330    SAVE_TWO_REGS x25, x26, 464
331    SAVE_TWO_REGS x27, x28, 480
332
333    // art::Runtime** xIP0 = &art::Runtime::instance_
334    adrp xIP0, :got:_ZN3art7Runtime9instance_E
335    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
336
337    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
338
339    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
340    ldr xIP0, [xIP0, \runtime_method_offset]
341
342    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
343    str xIP0, [sp]
344    // Place sp in Thread::Current()->top_quick_frame.
345    mov xIP0, sp
346    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
347.endm
348
349    /*
350     * Macro that sets up the callee save frame to conform with
351     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
352     */
353.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
354    INCREASE_FRAME 512
355    SAVE_TWO_REGS x29, xLR, 496
356    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \runtime_method_offset
357.endm
358
359.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
360    // Restore FP registers.
361    // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
362    ldr d0,       [sp, #8]
363    ldp d1, d2,   [sp, #16]
364    ldp d3, d4,   [sp, #32]
365    ldp d5, d6,   [sp, #48]
366    ldp d7, d8,   [sp, #64]
367    ldp d9, d10,  [sp, #80]
368    ldp d11, d12, [sp, #96]
369    ldp d13, d14, [sp, #112]
370    ldp d15, d16, [sp, #128]
371    ldp d17, d18, [sp, #144]
372    ldp d19, d20, [sp, #160]
373    ldp d21, d22, [sp, #176]
374    ldp d23, d24, [sp, #192]
375    ldp d25, d26, [sp, #208]
376    ldp d27, d28, [sp, #224]
377    ldp d29, d30, [sp, #240]
378    ldr d31,      [sp, #256]
379
380    // Restore core registers, except x0.
381    RESTORE_TWO_REGS  x1,  x2, 272
382    RESTORE_TWO_REGS  x3,  x4, 288
383    RESTORE_TWO_REGS  x5,  x6, 304
384    RESTORE_TWO_REGS  x7,  x8, 320
385    RESTORE_TWO_REGS  x9, x10, 336
386    RESTORE_TWO_REGS x11, x12, 352
387    RESTORE_TWO_REGS x13, x14, 368
388    RESTORE_TWO_REGS x15, x16, 384
389    RESTORE_TWO_REGS x17, x18, 400
390    RESTORE_TWO_REGS x19, x20, 416
391    RESTORE_TWO_REGS x21, x22, 432
392    RESTORE_TWO_REGS x23, x24, 448
393    RESTORE_TWO_REGS x25, x26, 464
394    RESTORE_TWO_REGS x27, x28, 480
395    RESTORE_TWO_REGS x29, xLR, 496
396
397    DECREASE_FRAME 512
398.endm
399
400.macro RESTORE_SAVE_EVERYTHING_FRAME
401    RESTORE_REG  x0, 264
402    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
403.endm
404
405// Macro to refresh the Marking Register (W20).
406//
407// This macro must be called at the end of functions implementing
408// entrypoints that possibly (directly or indirectly) perform a
409// suspend check (before they return).
410.macro REFRESH_MARKING_REGISTER
411#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
412    ldr wMR, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
413#endif
414.endm
415
416.macro RETURN_IF_RESULT_IS_ZERO
417    cbnz x0, 1f                // result non-zero branch over
418    ret                        // return
4191:
420.endm
421
422.macro RETURN_IF_RESULT_IS_NON_ZERO
423    cbz x0, 1f                 // result zero branch over
424    ret                        // return
4251:
426.endm
427
428    /*
429     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
430     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
431     */
432.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
433    mov x0, xSELF
434
435    // Point of no return.
436    bl artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*)
437    brk 0  // Unreached
438.endm
439
440    /*
441     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
442     * exception is Thread::Current()->exception_.
443     */
444.macro DELIVER_PENDING_EXCEPTION
445    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
446    DELIVER_PENDING_EXCEPTION_FRAME_READY
447.endm
448
449.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
450    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
451    cbnz \reg, 1f
452    ret
4531:
454    DELIVER_PENDING_EXCEPTION
455.endm
456
457.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
458    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0
459.endm
460
461// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
462.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
463    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
464.endm
465
466.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
467    cbnz w0, 1f                // result non-zero branch over
468    ret                        // return
4691:
470    DELIVER_PENDING_EXCEPTION
471.endm
472
473.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
474    .extern \cxx_name
475ENTRY \c_name
476    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
477    mov x0, xSELF                     // pass Thread::Current
478    bl  \cxx_name                     // \cxx_name(Thread*)
479    brk 0
480END \c_name
481.endm
482
483.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
484    .extern \cxx_name
485ENTRY \c_name
486    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
487    mov x0, xSELF                     // pass Thread::Current
488    bl  \cxx_name                     // \cxx_name(Thread*)
489    brk 0
490END \c_name
491.endm
492
493.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
494    .extern \cxx_name
495ENTRY \c_name
496    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
497    mov x1, xSELF                     // pass Thread::Current.
498    bl  \cxx_name                     // \cxx_name(arg, Thread*).
499    brk 0
500END \c_name
501.endm
502
503.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
504    .extern \cxx_name
505ENTRY \c_name
506    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
507    mov x2, xSELF                     // pass Thread::Current
508    bl  \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
509    brk 0
510END \c_name
511.endm
512
513    /*
514     * Called by managed code, saves callee saves and then calls artThrowException
515     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
516     */
517ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
518
519    /*
520     * Called by managed code to create and deliver a NullPointerException.
521     */
522NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
523
524    /*
525     * Call installed by a signal handler to create and deliver a NullPointerException.
526     */
527    .extern art_quick_throw_null_pointer_exception_from_signal
528ENTRY art_quick_throw_null_pointer_exception_from_signal
529    // The fault handler pushes the gc map address, i.e. "return address", to stack
530    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
531    .cfi_def_cfa_offset __SIZEOF_POINTER__
532    .cfi_rel_offset lr, 0
533    // Save all registers as basis for long jump context.
534    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
535    SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)  // LR already saved.
536    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
537    mov x0, lr                        // pass the fault address stored in LR by the fault handler.
538    mov x1, xSELF                     // pass Thread::Current.
539    bl  artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
540    brk 0
541END art_quick_throw_null_pointer_exception_from_signal
542
543    /*
544     * Called by managed code to create and deliver an ArithmeticException.
545     */
546NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
547
548    /*
549     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
550     * index, arg2 holds limit.
551     */
552TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
553
554    /*
555     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
556     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
557     */
558TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
559
560    /*
561     * Called by managed code to create and deliver a StackOverflowError.
562     */
563NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
564
565    /*
566     * All generated callsites for interface invokes and invocation slow paths will load arguments
567     * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
568     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
569     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
570     *
571     * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
572     * of the target Method* in x0 and method->code_ in x1.
573     *
574     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
575     * thread and we branch to another stub to deliver it.
576     *
577     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
578     * pointing back to the original caller.
579     *
580     * Adapted from ARM32 code.
581     *
582     * Clobbers xIP0.
583     */
584.macro INVOKE_TRAMPOLINE_BODY cxx_name
585    .extern \cxx_name
586    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
587    // Helper signature is always
588    // (method_idx, *this_object, *caller_method, *self, sp)
589
590    mov    x2, xSELF                      // pass Thread::Current
591    mov    x3, sp
592    bl     \cxx_name                      // (method_idx, this, Thread*, SP)
593    mov    xIP0, x1                       // save Method*->code_
594    RESTORE_SAVE_REFS_AND_ARGS_FRAME
595    REFRESH_MARKING_REGISTER
596    cbz    x0, 1f                         // did we find the target? if not go to exception delivery
597    br     xIP0                           // tail call to target
5981:
599    DELIVER_PENDING_EXCEPTION
600.endm
601.macro INVOKE_TRAMPOLINE c_name, cxx_name
602ENTRY \c_name
603    INVOKE_TRAMPOLINE_BODY \cxx_name
604END \c_name
605.endm
606
607INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
608
609INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
610INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
611INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
612INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
613
614
615.macro INVOKE_STUB_CREATE_FRAME
616
617SAVE_SIZE=15*8   // x4, x5, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
618SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
619
620
621    mov x9, sp                             // Save stack pointer.
622    .cfi_register sp,x9
623
624    add x10, x2, # SAVE_SIZE_AND_METHOD    // calculate size of frame.
625    sub x10, sp, x10                       // Calculate SP position - saves + ArtMethod* + args
626    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
627    mov sp, x10                            // Set new SP.
628
629    sub x10, x9, #SAVE_SIZE                // Calculate new FP (later). Done here as we must move SP
630    .cfi_def_cfa_register x10              // before this.
631    .cfi_adjust_cfa_offset SAVE_SIZE
632
633    str x28, [x10, #112]
634    .cfi_rel_offset x28, 112
635
636    stp x26, x27, [x10, #96]
637    .cfi_rel_offset x26, 96
638    .cfi_rel_offset x27, 104
639
640    stp x24, x25, [x10, #80]
641    .cfi_rel_offset x24, 80
642    .cfi_rel_offset x25, 88
643
644    stp x22, x23, [x10, #64]
645    .cfi_rel_offset x22, 64
646    .cfi_rel_offset x23, 72
647
648    stp x20, x21, [x10, #48]
649    .cfi_rel_offset x20, 48
650    .cfi_rel_offset x21, 56
651
652    stp x9, x19, [x10, #32]                // Save old stack pointer and x19.
653    .cfi_rel_offset sp, 32
654    .cfi_rel_offset x19, 40
655
656    stp x4, x5, [x10, #16]                 // Save result and shorty addresses.
657    .cfi_rel_offset x4, 16
658    .cfi_rel_offset x5, 24
659
660    stp xFP, xLR, [x10]                    // Store LR & FP.
661    .cfi_rel_offset x29, 0
662    .cfi_rel_offset x30, 8
663
664    mov xFP, x10                           // Use xFP now, as it's callee-saved.
665    .cfi_def_cfa_register x29
666    mov xSELF, x3                          // Move thread pointer into SELF register.
667
668    // Copy arguments into stack frame.
669    // Use simple copy routine for now.
670    // 4 bytes per slot.
671    // X1 - source address
672    // W2 - args length
673    // X9 - destination address.
674    // W10 - temporary
675    add x9, sp, #8                         // Destination address is bottom of stack + null.
676
677    // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
678    // does not have unique-id variables.
6791:
680    cmp w2, #0
681    beq 2f
682    sub w2, w2, #4      // Need 65536 bytes of range.
683    ldr w10, [x1, x2]
684    str w10, [x9, x2]
685
686    b 1b
687
6882:
689    // Store null into ArtMethod* at bottom of frame.
690    str xzr, [sp]
691.endm
692
693.macro INVOKE_STUB_CALL_AND_RETURN
694
695    REFRESH_MARKING_REGISTER
696
697    // load method-> METHOD_QUICK_CODE_OFFSET
698    ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
699    // Branch to method.
700    blr x9
701
702    // Restore return value address and shorty address.
703    ldp x4, x5, [xFP, #16]
704    .cfi_restore x4
705    .cfi_restore x5
706
707    ldr x28, [xFP, #112]
708    .cfi_restore x28
709
710    ldp x26, x27, [xFP, #96]
711    .cfi_restore x26
712    .cfi_restore x27
713
714    ldp x24, x25, [xFP, #80]
715    .cfi_restore x24
716    .cfi_restore x25
717
718    ldp x22, x23, [xFP, #64]
719    .cfi_restore x22
720    .cfi_restore x23
721
722    ldp x20, x21, [xFP, #48]
723    .cfi_restore x20
724    .cfi_restore x21
725
726    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
727    ldrb w10, [x5]
728
729    // Check the return type and store the correct register into the jvalue in memory.
730    // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
731
732    // Don't set anything for a void type.
733    cmp w10, #'V'
734    beq 3f
735
736    // Is it a double?
737    cmp w10, #'D'
738    bne 1f
739    str d0, [x4]
740    b 3f
741
7421:  // Is it a float?
743    cmp w10, #'F'
744    bne 2f
745    str s0, [x4]
746    b 3f
747
7482:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
749    str x0, [x4]
750
7513:  // Finish up.
752    ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
753    .cfi_restore x19
754    mov sp, x2
755    .cfi_restore sp
756
757    ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
758    .cfi_restore x29
759    .cfi_restore x30
760
761    ret
762
763.endm
764
765
766/*
767 *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
768 *                                       uint32_t  *args,     x1
769 *                                       uint32_t argsize,    w2
770 *                                       Thread *self,        x3
771 *                                       JValue *result,      x4
772 *                                       char   *shorty);     x5
773 *  +----------------------+
774 *  |                      |
775 *  |  C/C++ frame         |
776 *  |       LR''           |
777 *  |       FP''           | <- SP'
778 *  +----------------------+
779 *  +----------------------+
780 *  |        x28           | <- TODO: Remove callee-saves.
781 *  |         :            |
782 *  |        x19           |
783 *  |        SP'           |
784 *  |        X5            |
785 *  |        X4            |        Saved registers
786 *  |        LR'           |
787 *  |        FP'           | <- FP
788 *  +----------------------+
789 *  | uint32_t out[n-1]    |
790 *  |    :      :          |        Outs
791 *  | uint32_t out[0]      |
792 *  | ArtMethod*           | <- SP  value=null
793 *  +----------------------+
794 *
795 * Outgoing registers:
796 *  x0    - Method*
797 *  x1-x7 - integer parameters.
798 *  d0-d7 - Floating point parameters.
799 *  xSELF = self
800 *  SP = & of ArtMethod*
801 *  x1 = "this" pointer.
802 *
803 */
804ENTRY art_quick_invoke_stub
805    // Spill registers as per AACPS64 calling convention.
806    INVOKE_STUB_CREATE_FRAME
807
808    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
809    // Parse the passed shorty to determine which register to load.
810    // Load addresses for routines that load WXSD registers.
811    adr  x11, .LstoreW2
812    adr  x12, .LstoreX2
813    adr  x13, .LstoreS0
814    adr  x14, .LstoreD0
815
816    // Initialize routine offsets to 0 for integers and floats.
817    // x8 for integers, x15 for floating point.
818    mov x8, #0
819    mov x15, #0
820
821    add x10, x5, #1         // Load shorty address, plus one to skip return value.
822    ldr w1, [x9],#4         // Load "this" parameter, and increment arg pointer.
823
824    // Loop to fill registers.
825.LfillRegisters:
826    ldrb w17, [x10], #1       // Load next character in signature, and increment.
827    cbz w17, .LcallFunction   // Exit at end of signature. Shorty 0 terminated.
828
829    cmp  w17, #'F' // is this a float?
830    bne .LisDouble
831
832    cmp x15, # 8*12         // Skip this load if all registers full.
833    beq .Ladvance4
834
835    add x17, x13, x15       // Calculate subroutine to jump to.
836    br  x17
837
838.LisDouble:
839    cmp w17, #'D'           // is this a double?
840    bne .LisLong
841
842    cmp x15, # 8*12         // Skip this load if all registers full.
843    beq .Ladvance8
844
845    add x17, x14, x15       // Calculate subroutine to jump to.
846    br x17
847
848.LisLong:
849    cmp w17, #'J'           // is this a long?
850    bne .LisOther
851
852    cmp x8, # 6*12          // Skip this load if all registers full.
853    beq .Ladvance8
854
855    add x17, x12, x8        // Calculate subroutine to jump to.
856    br x17
857
858.LisOther:                  // Everything else takes one vReg.
859    cmp x8, # 6*12          // Skip this load if all registers full.
860    beq .Ladvance4
861
862    add x17, x11, x8        // Calculate subroutine to jump to.
863    br x17
864
865.Ladvance4:
866    add x9, x9, #4
867    b .LfillRegisters
868
869.Ladvance8:
870    add x9, x9, #8
871    b .LfillRegisters
872
873// Macro for loading a parameter into a register.
874//  counter - the register with offset into these tables
875//  size - the size of the register - 4 or 8 bytes.
876//  register - the name of the register to be loaded.
877.macro LOADREG counter size register return
878    ldr \register , [x9], #\size
879    add \counter, \counter, 12
880    b \return
881.endm
882
883// Store ints.
884.LstoreW2:
885    LOADREG x8 4 w2 .LfillRegisters
886    LOADREG x8 4 w3 .LfillRegisters
887    LOADREG x8 4 w4 .LfillRegisters
888    LOADREG x8 4 w5 .LfillRegisters
889    LOADREG x8 4 w6 .LfillRegisters
890    LOADREG x8 4 w7 .LfillRegisters
891
892// Store longs.
893.LstoreX2:
894    LOADREG x8 8 x2 .LfillRegisters
895    LOADREG x8 8 x3 .LfillRegisters
896    LOADREG x8 8 x4 .LfillRegisters
897    LOADREG x8 8 x5 .LfillRegisters
898    LOADREG x8 8 x6 .LfillRegisters
899    LOADREG x8 8 x7 .LfillRegisters
900
901// Store singles.
902.LstoreS0:
903    LOADREG x15 4 s0 .LfillRegisters
904    LOADREG x15 4 s1 .LfillRegisters
905    LOADREG x15 4 s2 .LfillRegisters
906    LOADREG x15 4 s3 .LfillRegisters
907    LOADREG x15 4 s4 .LfillRegisters
908    LOADREG x15 4 s5 .LfillRegisters
909    LOADREG x15 4 s6 .LfillRegisters
910    LOADREG x15 4 s7 .LfillRegisters
911
912// Store doubles.
913.LstoreD0:
914    LOADREG x15 8 d0 .LfillRegisters
915    LOADREG x15 8 d1 .LfillRegisters
916    LOADREG x15 8 d2 .LfillRegisters
917    LOADREG x15 8 d3 .LfillRegisters
918    LOADREG x15 8 d4 .LfillRegisters
919    LOADREG x15 8 d5 .LfillRegisters
920    LOADREG x15 8 d6 .LfillRegisters
921    LOADREG x15 8 d7 .LfillRegisters
922
923
924.LcallFunction:
925
926    INVOKE_STUB_CALL_AND_RETURN
927
928END art_quick_invoke_stub
929
930/*  extern"C"
931 *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
932 *                                       uint32_t  *args,     x1
933 *                                       uint32_t argsize,    w2
934 *                                       Thread *self,        x3
935 *                                       JValue *result,      x4
936 *                                       char   *shorty);     x5
937 */
938ENTRY art_quick_invoke_static_stub
939    // Spill registers as per AACPS64 calling convention.
940    INVOKE_STUB_CREATE_FRAME
941
942    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
943    // Parse the passed shorty to determine which register to load.
944    // Load addresses for routines that load WXSD registers.
945    adr  x11, .LstoreW1_2
946    adr  x12, .LstoreX1_2
947    adr  x13, .LstoreS0_2
948    adr  x14, .LstoreD0_2
949
950    // Initialize routine offsets to 0 for integers and floats.
951    // x8 for integers, x15 for floating point.
952    mov x8, #0
953    mov x15, #0
954
955    add x10, x5, #1     // Load shorty address, plus one to skip return value.
956
957    // Loop to fill registers.
958.LfillRegisters2:
959    ldrb w17, [x10], #1         // Load next character in signature, and increment.
960    cbz w17, .LcallFunction2    // Exit at end of signature. Shorty 0 terminated.
961
962    cmp  w17, #'F'          // is this a float?
963    bne .LisDouble2
964
965    cmp x15, # 8*12         // Skip this load if all registers full.
966    beq .Ladvance4_2
967
968    add x17, x13, x15       // Calculate subroutine to jump to.
969    br  x17
970
971.LisDouble2:
972    cmp w17, #'D'           // is this a double?
973    bne .LisLong2
974
975    cmp x15, # 8*12         // Skip this load if all registers full.
976    beq .Ladvance8_2
977
978    add x17, x14, x15       // Calculate subroutine to jump to.
979    br x17
980
981.LisLong2:
982    cmp w17, #'J'           // is this a long?
983    bne .LisOther2
984
985    cmp x8, # 7*12          // Skip this load if all registers full.
986    beq .Ladvance8_2
987
988    add x17, x12, x8        // Calculate subroutine to jump to.
989    br x17
990
991.LisOther2:                 // Everything else takes one vReg.
992    cmp x8, # 7*12          // Skip this load if all registers full.
993    beq .Ladvance4_2
994
995    add x17, x11, x8        // Calculate subroutine to jump to.
996    br x17
997
998.Ladvance4_2:
999    add x9, x9, #4
1000    b .LfillRegisters2
1001
1002.Ladvance8_2:
1003    add x9, x9, #8
1004    b .LfillRegisters2
1005
1006// Store ints.
1007.LstoreW1_2:
1008    LOADREG x8 4 w1 .LfillRegisters2
1009    LOADREG x8 4 w2 .LfillRegisters2
1010    LOADREG x8 4 w3 .LfillRegisters2
1011    LOADREG x8 4 w4 .LfillRegisters2
1012    LOADREG x8 4 w5 .LfillRegisters2
1013    LOADREG x8 4 w6 .LfillRegisters2
1014    LOADREG x8 4 w7 .LfillRegisters2
1015
1016// Store longs.
1017.LstoreX1_2:
1018    LOADREG x8 8 x1 .LfillRegisters2
1019    LOADREG x8 8 x2 .LfillRegisters2
1020    LOADREG x8 8 x3 .LfillRegisters2
1021    LOADREG x8 8 x4 .LfillRegisters2
1022    LOADREG x8 8 x5 .LfillRegisters2
1023    LOADREG x8 8 x6 .LfillRegisters2
1024    LOADREG x8 8 x7 .LfillRegisters2
1025
1026// Store singles.
1027.LstoreS0_2:
1028    LOADREG x15 4 s0 .LfillRegisters2
1029    LOADREG x15 4 s1 .LfillRegisters2
1030    LOADREG x15 4 s2 .LfillRegisters2
1031    LOADREG x15 4 s3 .LfillRegisters2
1032    LOADREG x15 4 s4 .LfillRegisters2
1033    LOADREG x15 4 s5 .LfillRegisters2
1034    LOADREG x15 4 s6 .LfillRegisters2
1035    LOADREG x15 4 s7 .LfillRegisters2
1036
1037// Store doubles.
1038.LstoreD0_2:
1039    LOADREG x15 8 d0 .LfillRegisters2
1040    LOADREG x15 8 d1 .LfillRegisters2
1041    LOADREG x15 8 d2 .LfillRegisters2
1042    LOADREG x15 8 d3 .LfillRegisters2
1043    LOADREG x15 8 d4 .LfillRegisters2
1044    LOADREG x15 8 d5 .LfillRegisters2
1045    LOADREG x15 8 d6 .LfillRegisters2
1046    LOADREG x15 8 d7 .LfillRegisters2
1047
1048
1049.LcallFunction2:
1050
1051    INVOKE_STUB_CALL_AND_RETURN
1052
1053END art_quick_invoke_static_stub
1054
1055
1056
1057/*  extern"C" void art_quick_osr_stub(void** stack,                x0
1058 *                                    size_t stack_size_in_bytes,  x1
1059 *                                    const uin8_t* native_pc,     x2
1060 *                                    JValue *result,              x3
1061 *                                    char   *shorty,              x4
1062 *                                    Thread *self)                x5
1063 */
1064ENTRY art_quick_osr_stub
1065SAVE_SIZE=15*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
1066    mov x9, sp                             // Save stack pointer.
1067    .cfi_register sp,x9
1068
1069    sub x10, sp, # SAVE_SIZE
1070    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
1071    mov sp, x10                            // Set new SP.
1072
1073    str x28, [sp, #112]
1074    stp x26, x27, [sp, #96]
1075    stp x24, x25, [sp, #80]
1076    stp x22, x23, [sp, #64]
1077    stp x20, x21, [sp, #48]
1078    stp x9, x19, [sp, #32]                // Save old stack pointer and x19.
1079    stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
1080    stp xFP, xLR, [sp]                    // Store LR & FP.
1081    mov xSELF, x5                         // Move thread pointer into SELF register.
1082    REFRESH_MARKING_REGISTER
1083
1084    sub sp, sp, #16
1085    str xzr, [sp]                         // Store null for ArtMethod* slot
1086    // Branch to stub.
1087    bl .Losr_entry
1088    add sp, sp, #16
1089
1090    // Restore return value address and shorty address.
1091    ldp x3,x4, [sp, #16]
1092    ldr x28, [sp, #112]
1093    ldp x26, x27, [sp, #96]
1094    ldp x24, x25, [sp, #80]
1095    ldp x22, x23, [sp, #64]
1096    ldp x20, x21, [sp, #48]
1097
1098    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
1099    ldrb w10, [x4]
1100
1101    // Check the return type and store the correct register into the jvalue in memory.
1102
1103    // Don't set anything for a void type.
1104    cmp w10, #'V'
1105    beq .Losr_exit
1106
1107    // Is it a double?
1108    cmp w10, #'D'
1109    bne .Lno_double
1110    str d0, [x3]
1111    b .Losr_exit
1112
1113.Lno_double:  // Is it a float?
1114    cmp w10, #'F'
1115    bne .Lno_float
1116    str s0, [x3]
1117    b .Losr_exit
1118
1119.Lno_float:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
1120    str x0, [x3]
1121
1122.Losr_exit:  // Finish up.
1123    ldp x2, x19, [sp, #32]   // Restore stack pointer and x19.
1124    ldp xFP, xLR, [sp]    // Restore old frame pointer and link register.
1125    mov sp, x2
1126    ret
1127
1128.Losr_entry:
1129    // Update stack pointer for the callee
1130    sub sp, sp, x1
1131
1132    // Update link register slot expected by the callee.
1133    sub w1, w1, #8
1134    str lr, [sp, x1]
1135
1136    // Copy arguments into stack frame.
1137    // Use simple copy routine for now.
1138    // 4 bytes per slot.
1139    // X0 - source address
1140    // W1 - args length
1141    // SP - destination address.
1142    // W10 - temporary
1143.Losr_loop_entry:
1144    cmp w1, #0
1145    beq .Losr_loop_exit
1146    sub w1, w1, #4
1147    ldr w10, [x0, x1]
1148    str w10, [sp, x1]
1149    b .Losr_loop_entry
1150
1151.Losr_loop_exit:
1152    // Branch to the OSR entry point.
1153    br x2
1154
1155END art_quick_osr_stub
1156
1157    /*
1158     * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
1159     */
1160
1161ENTRY art_quick_do_long_jump
1162    // Load FPRs
1163    ldp d0, d1, [x1], #16
1164    ldp d2, d3, [x1], #16
1165    ldp d4, d5, [x1], #16
1166    ldp d6, d7, [x1], #16
1167    ldp d8, d9, [x1], #16
1168    ldp d10, d11, [x1], #16
1169    ldp d12, d13, [x1], #16
1170    ldp d14, d15, [x1], #16
1171    ldp d16, d17, [x1], #16
1172    ldp d18, d19, [x1], #16
1173    ldp d20, d21, [x1], #16
1174    ldp d22, d23, [x1], #16
1175    ldp d24, d25, [x1], #16
1176    ldp d26, d27, [x1], #16
1177    ldp d28, d29, [x1], #16
1178    ldp d30, d31, [x1]
1179
1180    // Load GPRs
1181    // TODO: lots of those are smashed, could optimize.
1182    add x0, x0, #30*8
1183    ldp x30, x1, [x0], #-16          // LR & SP
1184    ldp x28, x29, [x0], #-16
1185    ldp x26, x27, [x0], #-16
1186    ldp x24, x25, [x0], #-16
1187    ldp x22, x23, [x0], #-16
1188    ldp x20, x21, [x0], #-16
1189    ldp x18, x19, [x0], #-16         // X18 & xSELF
1190    ldp x16, x17, [x0], #-16
1191    ldp x14, x15, [x0], #-16
1192    ldp x12, x13, [x0], #-16
1193    ldp x10, x11, [x0], #-16
1194    ldp x8, x9, [x0], #-16
1195    ldp x6, x7, [x0], #-16
1196    ldp x4, x5, [x0], #-16
1197    ldp x2, x3, [x0], #-16
1198    mov sp, x1
1199
1200    REFRESH_MARKING_REGISTER
1201
1202    // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
1203    ldr x1, [x0, #33*8]
1204    // And the value of x0.
1205    ldr x0, [x0]
1206
1207    br  x1
1208END art_quick_do_long_jump
1209
1210    /*
1211     * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
1212     * possibly null object to lock.
1213     *
1214     * Derived from arm32 code.
1215     */
1216    .extern artLockObjectFromCode
1217ENTRY art_quick_lock_object
1218    cbz    w0, .Lslow_lock
1219    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
1220.Lretry_lock:
1221    ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
1222    ldaxr  w1, [x4]                   // acquire needed only in most common case
1223    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1224    cbnz   w3, .Lnot_unlocked         // already thin locked
1225    // unlocked case - x1: original lock word that's zero except for the read barrier bits.
1226    orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
1227    stxr   w3, w2, [x4]
1228    cbnz   w3, .Llock_stxr_fail       // store failed, retry
1229    ret
1230.Lnot_unlocked:  // x1: original lock word
1231    lsr    w3, w1, LOCK_WORD_STATE_SHIFT
1232    cbnz   w3, .Lslow_lock            // if either of the top two bits are set, go slow path
1233    eor    w2, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
1234    uxth   w2, w2                     // zero top 16 bits
1235    cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
1236                                      // else contention, go to slow path
1237    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
1238    add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
1239    lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
1240    cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
1241    add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
1242    stxr   w3, w2, [x4]
1243    cbnz   w3, .Llock_stxr_fail       // store failed, retry
1244    ret
1245.Llock_stxr_fail:
1246    b      .Lretry_lock               // retry
1247.Lslow_lock:
1248    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1249    mov    x1, xSELF                  // pass Thread::Current
1250    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1251    RESTORE_SAVE_REFS_ONLY_FRAME
1252    REFRESH_MARKING_REGISTER
1253    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1254END art_quick_lock_object
1255
1256ENTRY art_quick_lock_object_no_inline
1257    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1258    mov    x1, xSELF                  // pass Thread::Current
1259    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1260    RESTORE_SAVE_REFS_ONLY_FRAME
1261    REFRESH_MARKING_REGISTER
1262    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1263END art_quick_lock_object_no_inline
1264
1265    /*
1266     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
1267     * x0 holds the possibly null object to lock.
1268     *
1269     * Derived from arm32 code.
1270     */
1271    .extern artUnlockObjectFromCode
1272ENTRY art_quick_unlock_object
1273    cbz    x0, .Lslow_unlock
1274    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
1275.Lretry_unlock:
1276#ifndef USE_READ_BARRIER
1277    ldr    w1, [x4]
1278#else
1279    ldxr   w1, [x4]                   // Need to use atomic instructions for read barrier
1280#endif
1281    lsr    w2, w1, LOCK_WORD_STATE_SHIFT
1282    cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
1283    ldr    w2, [xSELF, #THREAD_ID_OFFSET]
1284    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1285    eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
1286    uxth   w3, w3                     // zero top 16 bits
1287    cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
1288    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1289    cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
1290    bpl    .Lrecursive_thin_unlock
1291    // transition to unlocked
1292    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
1293#ifndef USE_READ_BARRIER
1294    stlr   w3, [x4]
1295#else
1296    stlxr  w2, w3, [x4]               // Need to use atomic instructions for read barrier
1297    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
1298#endif
1299    ret
1300.Lrecursive_thin_unlock:  // w1: original lock word
1301    sub    w1, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
1302#ifndef USE_READ_BARRIER
1303    str    w1, [x4]
1304#else
1305    stxr   w2, w1, [x4]               // Need to use atomic instructions for read barrier
1306    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
1307#endif
1308    ret
1309.Lunlock_stxr_fail:
1310    b      .Lretry_unlock             // retry
1311.Lslow_unlock:
1312    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1313    mov    x1, xSELF                  // pass Thread::Current
1314    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1315    RESTORE_SAVE_REFS_ONLY_FRAME
1316    REFRESH_MARKING_REGISTER
1317    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1318END art_quick_unlock_object
1319
1320ENTRY art_quick_unlock_object_no_inline
1321    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1322    mov    x1, xSELF                  // pass Thread::Current
1323    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1324    RESTORE_SAVE_REFS_ONLY_FRAME
1325    REFRESH_MARKING_REGISTER
1326    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1327END art_quick_unlock_object_no_inline
1328
1329    /*
1330     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
1331     * artThrowClassCastExceptionForObject.
1332     */
1333    .extern artInstanceOfFromCode
1334    .extern artThrowClassCastExceptionForObject
1335ENTRY art_quick_check_instance_of
1336    // Store arguments and link register
1337    // Stack needs to be 16B aligned on calls.
1338    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
1339    SAVE_REG xLR, 24
1340
1341    // Call runtime code
1342    bl artInstanceOfFromCode
1343
1344    // Check for exception
1345    cbz x0, .Lthrow_class_cast_exception
1346
1347    // Restore and return
1348    .cfi_remember_state
1349    RESTORE_REG xLR, 24
1350    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1351    ret
1352    .cfi_restore_state                // Reset unwind info so following code unwinds.
1353    .cfi_def_cfa_offset 32            // workaround for clang bug: 31975598
1354
1355.Lthrow_class_cast_exception:
1356    // Restore
1357    RESTORE_REG xLR, 24
1358    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1359
1360    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1361    mov x2, xSELF                     // pass Thread::Current
1362    bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
1363    brk 0                             // We should not return here...
1364END art_quick_check_instance_of
1365
1366// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
1367.macro POP_REG_NE xReg, offset, xExclude
1368    .ifnc \xReg, \xExclude
1369        ldr \xReg, [sp, #\offset]     // restore xReg
1370        .cfi_restore \xReg
1371    .endif
1372.endm
1373
1374// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
1375// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
1376.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
1377    .ifc \xReg1, \xExclude
1378        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
1379    .else
1380        .ifc \xReg2, \xExclude
1381            ldr \xReg1, [sp, #\offset]          // restore xReg1
1382        .else
1383            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
1384        .endif
1385    .endif
1386    .cfi_restore \xReg1
1387    .cfi_restore \xReg2
1388.endm
1389
1390    /*
1391     * Macro to insert read barrier, only used in art_quick_aput_obj.
1392     * xDest, wDest and xObj are registers, offset is a defined literal such as
1393     * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle
1394     * name mismatch between instructions. This macro uses the lower 32b of register when possible.
1395     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1396     */
1397.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
1398#ifdef USE_READ_BARRIER
1399# ifdef USE_BAKER_READ_BARRIER
1400    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
1401    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
1402    // False dependency to avoid needing load/load fence.
1403    add \xObj, \xObj, \xTemp, lsr #32
1404    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1405    UNPOISON_HEAP_REF \wDest
1406    b .Lrb_exit\number
1407# endif  // USE_BAKER_READ_BARRIER
1408.Lrb_slowpath\number:
1409    // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
1410    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 48
1411    SAVE_TWO_REGS x2, x3, 16
1412    SAVE_TWO_REGS x4, xLR, 32
1413
1414    // mov x0, \xRef                // pass ref in x0 (no-op for now since parameter ref is unused)
1415    .ifnc \xObj, x1
1416        mov x1, \xObj               // pass xObj
1417    .endif
1418    mov w2, #\offset                // pass offset
1419    bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
1420    // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
1421    .ifnc \wDest, w0
1422        mov \wDest, w0              // save return value in wDest
1423    .endif
1424
1425    // Conditionally restore saved registers
1426    POP_REG_NE x0, 0, \xDest
1427    POP_REG_NE x1, 8, \xDest
1428    POP_REG_NE x2, 16, \xDest
1429    POP_REG_NE x3, 24, \xDest
1430    POP_REG_NE x4, 32, \xDest
1431    RESTORE_REG xLR, 40
1432    DECREASE_FRAME 48
1433.Lrb_exit\number:
1434#else
1435    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1436    UNPOISON_HEAP_REF \wDest
1437#endif  // USE_READ_BARRIER
1438.endm
1439
1440#ifdef USE_READ_BARRIER
1441    .extern artReadBarrierSlow
1442#endif
1443ENTRY art_quick_aput_obj
1444    cbz x2, .Ldo_aput_null
1445    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
1446                                                                    // This also zero-extends to x3
1447    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
1448    // This also zero-extends to x3
1449    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
1450                                                                    // This also zero-extends to x4
1451    cmp w3, w4  // value's type == array's component type - trivial assignability
1452    bne .Lcheck_assignability
1453.Ldo_aput:
1454    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1455                                                         // "Compress" = do nothing
1456    POISON_HEAP_REF w2
1457    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1458    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1459    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
1460    strb w3, [x3, x0]
1461    ret
1462.Ldo_aput_null:
1463    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1464                                                         // "Compress" = do nothing
1465    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1466    ret
1467.Lcheck_assignability:
1468    // Store arguments and link register
1469    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
1470    SAVE_TWO_REGS x2, xLR, 16
1471
1472    // Call runtime code
1473    mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1474    mov x1, x4              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1475    bl artIsAssignableFromCode
1476
1477    // Check for exception
1478    cbz x0, .Lthrow_array_store_exception
1479
1480    // Restore
1481    .cfi_remember_state
1482    RESTORE_TWO_REGS x2, xLR, 16
1483    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1484
1485    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1486                                                          // "Compress" = do nothing
1487    POISON_HEAP_REF w2
1488    str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
1489    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1490    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
1491    strb w3, [x3, x0]
1492    ret
1493    .cfi_restore_state            // Reset unwind info so following code unwinds.
1494    .cfi_def_cfa_offset 32        // workaround for clang bug: 31975598
1495.Lthrow_array_store_exception:
1496    RESTORE_TWO_REGS x2, xLR, 16
1497    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1498
1499    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
1500    mov x1, x2                      // Pass value.
1501    mov x2, xSELF                   // Pass Thread::Current.
1502    bl artThrowArrayStoreException  // (Object*, Object*, Thread*).
1503    brk 0                           // Unreached.
1504END art_quick_aput_obj
1505
1506// Macro to facilitate adding new allocation entrypoints.
1507.macro ONE_ARG_DOWNCALL name, entrypoint, return
1508    .extern \entrypoint
1509ENTRY \name
1510    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1511    mov    x1, xSELF                  // pass Thread::Current
1512    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1513    RESTORE_SAVE_REFS_ONLY_FRAME
1514    REFRESH_MARKING_REGISTER
1515    \return
1516END \name
1517.endm
1518
1519// Macro to facilitate adding new allocation entrypoints.
1520.macro TWO_ARG_DOWNCALL name, entrypoint, return
1521    .extern \entrypoint
1522ENTRY \name
1523    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1524    mov    x2, xSELF                  // pass Thread::Current
1525    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1526    RESTORE_SAVE_REFS_ONLY_FRAME
1527    REFRESH_MARKING_REGISTER
1528    \return
1529END \name
1530.endm
1531
1532// Macro to facilitate adding new allocation entrypoints.
1533.macro THREE_ARG_DOWNCALL name, entrypoint, return
1534    .extern \entrypoint
1535ENTRY \name
1536    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1537    mov    x3, xSELF                  // pass Thread::Current
1538    bl     \entrypoint
1539    RESTORE_SAVE_REFS_ONLY_FRAME
1540    REFRESH_MARKING_REGISTER
1541    \return
1542END \name
1543.endm
1544
1545// Macro to facilitate adding new allocation entrypoints.
1546.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1547    .extern \entrypoint
1548ENTRY \name
1549    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1550    mov    x4, xSELF                  // pass Thread::Current
1551    bl     \entrypoint                //
1552    RESTORE_SAVE_REFS_ONLY_FRAME
1553    REFRESH_MARKING_REGISTER
1554    \return
1555END \name
1556.endm
1557
1558// Macros taking opportunity of code similarities for downcalls.
1559.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
1560    .extern \entrypoint
1561ENTRY \name
1562    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1563    mov    x1, xSELF                  // pass Thread::Current
1564    bl     \entrypoint                // (uint32_t type_idx, Thread*)
1565    RESTORE_SAVE_REFS_ONLY_FRAME
1566    REFRESH_MARKING_REGISTER
1567    \return
1568END \name
1569.endm
1570
1571.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
1572    .extern \entrypoint
1573ENTRY \name
1574    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1575    mov    x2, xSELF                  // pass Thread::Current
1576    bl     \entrypoint
1577    RESTORE_SAVE_REFS_ONLY_FRAME
1578    REFRESH_MARKING_REGISTER
1579    \return
1580END \name
1581.endm
1582
1583.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
1584    .extern \entrypoint
1585ENTRY \name
1586    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1587    mov    x3, xSELF                  // pass Thread::Current
1588    bl     \entrypoint
1589    RESTORE_SAVE_REFS_ONLY_FRAME
1590    REFRESH_MARKING_REGISTER
1591    \return
1592END \name
1593.endm
1594
1595// Macro for string and type resolution and initialization.
1596.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
1597    .extern \entrypoint
1598ENTRY \name
1599    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset       // save everything for stack crawl
1600    mov   x1, xSELF                   // pass Thread::Current
1601    bl    \entrypoint                 // (int32_t index, Thread* self)
1602    cbz   w0, 1f                      // If result is null, deliver the OOME.
1603    .cfi_remember_state
1604    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
1605    REFRESH_MARKING_REGISTER
1606    ret                        // return
1607    .cfi_restore_state
1608    .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING  // workaround for clang bug: 31975598
16091:
1610    DELIVER_PENDING_EXCEPTION_FRAME_READY
1611END \name
1612.endm
1613
1614.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
1615    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
1616.endm
1617
1618.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1619    cbz w0, 1f                 // result zero branch over
1620    ret                        // return
16211:
1622    DELIVER_PENDING_EXCEPTION
1623.endm
1624
1625    /*
1626     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
1627     * failure.
1628     */
1629TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1630
1631    /*
1632     * Entry from managed code when uninitialized static storage, this stub will run the class
1633     * initializer and deliver the exception on error. On success the static storage base is
1634     * returned.
1635     */
1636ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1637ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
1638ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
1639ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1640
1641// Note: Functions `art{Get,Set}<Kind>{Static,Instance>FromCompiledCode` are
1642// defined by macros in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1643
1644ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1645ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1646ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1647ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1648ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1649ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1650ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1651
1652TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1653TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1654TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1655TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1656TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1657TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1658TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1659
1660TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1661TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1662TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1663TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1664TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1665
1666THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1667THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1668THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1669THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1670THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1671
1672// Generate the allocation entrypoints for each allocator.
1673GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1674// Comment out allocators that have arm64 specific asm.
1675// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1676// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1677GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1678// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1679// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1680// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1681// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1682// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1683GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1684GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1685GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1686
1687// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1688// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1689GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1690// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1691// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1692// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1693// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1694// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1695GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1696GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1697GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1698
1699// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1700// If isInitialized=0 the compiler can only assume it's been at least resolved.
1701.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
1702ENTRY \c_name
1703    // Fast path rosalloc allocation.
1704    // x0: type, xSELF(x19): Thread::Current
1705    // x1-x7: free.
1706    ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
1707                                                              // allocation stack has room.
1708                                                              // ldp won't work due to large offset.
1709    ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1710    cmp    x3, x4
1711    bhs    .Lslow_path\c_name
1712    ldr    w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
1713    cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1714                                                              // local allocation. Also does the
1715                                                              // finalizable and initialization
1716                                                              // checks.
1717    // When isInitialized == 0, then the class is potentially not yet initialized.
1718    // If the class is not yet initialized, the object size will be very large to force the branch
1719    // below to be taken.
1720    //
1721    // See InitializeClassVisitors in class-inl.h for more details.
1722    bhs    .Lslow_path\c_name
1723                                                              // Compute the rosalloc bracket index
1724                                                              // from the size. Since the size is
1725                                                              // already aligned we can combine the
1726                                                              // two shifts together.
1727    add    x4, xSELF, x3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
1728                                                              // Subtract pointer size since ther
1729                                                              // are no runs for 0 byte allocations
1730                                                              // and the size is already aligned.
1731    ldr    x4, [x4, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
1732                                                              // Load the free list head (x3). This
1733                                                              // will be the return val.
1734    ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1735    cbz    x3, .Lslow_path\c_name
1736    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1737    ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1738                                                              // and update the list head with the
1739                                                              // next pointer.
1740    str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1741                                                              // Store the class pointer in the
1742                                                              // header. This also overwrites the
1743                                                              // next pointer. The offsets are
1744                                                              // asserted to match.
1745
1746#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1747#error "Class pointer needs to overwrite next pointer."
1748#endif
1749    POISON_HEAP_REF w0
1750    str    w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
1751                                                              // Push the new object onto the thread
1752                                                              // local allocation stack and
1753                                                              // increment the thread local
1754                                                              // allocation stack top.
1755    ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1756    str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
1757    str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1758                                                              // Decrement the size of the free list
1759
1760    // After this "STR" the object is published to the thread local allocation stack,
1761    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1762    // It is not yet visible to the running (user) compiled code until after the return.
1763    //
1764    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1765    // the state of the allocation stack slot. It can be a pointer to one of:
1766    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1767    //       (The stack initial state is "null" pointers).
1768    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1769    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1770    // Other states are not allowed.
1771    //
1772    // An object that is invalid only temporarily, and will eventually become valid.
1773    // The internal runtime code simply checks if the object is not null or is partial and then
1774    // ignores it.
1775    //
1776    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1777    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1778    // "next" pointer is not-cyclic.)
1779    //
1780    // See also b/28790624 for a listing of CLs dealing with this race.
1781    ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1782    sub    x1, x1, #1
1783                                                              // TODO: consider combining this store
1784                                                              // and the list head store above using
1785                                                              // strd.
1786    str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1787
1788    mov    x0, x3                                             // Set the return value and return.
1789.if \isInitialized == 0
1790    // This barrier is only necessary when the allocation also requires
1791    // a class initialization check.
1792    //
1793    // If the class is already observably initialized, then new-instance allocations are protected
1794    // from publishing by the compiler which inserts its own StoreStore barrier.
1795    dmb    ish
1796    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1797    // they should happen-after the implicit initialization check.
1798    //
1799    // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
1800    // a new observably-initialized class state.
1801.endif
1802    ret
1803.Lslow_path\c_name:
1804    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
1805    mov    x1, xSELF                                // pass Thread::Current
1806    bl     \cxx_name
1807    RESTORE_SAVE_REFS_ONLY_FRAME
1808    REFRESH_MARKING_REGISTER
1809    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1810END \c_name
1811.endm
1812
1813ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1814ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1815
1816// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1817// If isInitialized=0 the compiler can only assume it's been at least resolved.
1818.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel isInitialized
1819    ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
1820    ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
1821    ldr    w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
1822    add    x6, x4, x7                                         // Add object size to tlab pos.
1823    cmp    x6, x5                                             // Check if it fits, overflow works
1824                                                              // since the tlab pos and end are 32
1825                                                              // bit values.
1826
1827    // When isInitialized == 0, then the class is potentially not yet initialized.
1828    // If the class is not yet initialized, the object size will be very large to force the branch
1829    // below to be taken.
1830    //
1831    // See InitializeClassVisitors in class-inl.h for more details.
1832    bhi    \slowPathLabel
1833    str    x6, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1834    ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
1835    add    x5, x5, #1
1836    str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1837    POISON_HEAP_REF w0
1838    str    w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1839                                                              // Fence. This is "ish" not "ishst" so
1840                                                              // that the code after this allocation
1841                                                              // site will see the right values in
1842                                                              // the fields of the class.
1843    mov    x0, x4
1844.if \isInitialized == 0
1845    // This barrier is only necessary when the allocation also requires
1846    // a class initialization check.
1847    //
1848    // If the class is already observably initialized, then new-instance allocations are protected
1849    // from publishing by the compiler which inserts its own StoreStore barrier.
1850    dmb    ish
1851    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1852    // they should happen-after the implicit initialization check.
1853    //
1854    // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
1855    // a new observably-initialized class state.
1856.endif
1857    ret
1858.endm
1859
1860// The common code for art_quick_alloc_object_*region_tlab
1861.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1862ENTRY \name
1863    // Fast path region tlab allocation.
1864    // x0: type, xSELF(x19): Thread::Current
1865    // x1-x7: free.
1866    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name, \isInitialized
1867.Lslow_path\name:
1868    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
1869    mov    x1, xSELF                           // Pass Thread::Current.
1870    bl     \entrypoint                         // (mirror::Class*, Thread*)
1871    RESTORE_SAVE_REFS_ONLY_FRAME
1872    REFRESH_MARKING_REGISTER
1873    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1874END \name
1875.endm
1876
1877GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1878GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1879GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1880GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1881
1882.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1883    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignment mask
1884                                                              // (addr + 7) & ~7. The mask must
1885                                                              // be 64 bits to keep high bits in
1886                                                              // case of overflow.
1887    // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
1888    // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
1889    // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
1890    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
1891    bhs    \slowPathLabel                                     // path.
1892
1893    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
1894                                                              // we use (end - begin) to handle
1895                                                              // negative size arrays. It is
1896                                                              // assumed that a negative size will
1897                                                              // always be greater unsigned than
1898                                                              // region size.
1899    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
1900    sub    \xTemp2, \xTemp2, \xTemp0
1901    cmp    \xTemp1, \xTemp2
1902
1903    // The array class is always initialized here. Unlike new-instance,
1904    // this does not act as a double test.
1905    bhi    \slowPathLabel
1906    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1907                                                              // Move old thread_local_pos to x0
1908                                                              // for the return value.
1909    mov    x0, \xTemp0
1910    add    \xTemp0, \xTemp0, \xTemp1
1911    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
1912    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
1913    add    \xTemp0, \xTemp0, #1
1914    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1915    POISON_HEAP_REF \wClass
1916    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
1917    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
1918                                                              // Fence.
1919// new-array is special. The class is loaded and immediately goes to the Initialized state
1920// before it is published. Therefore the only fence needed is for the publication of the object.
1921// See ClassLinker::CreateArrayClass() for more details.
1922
1923// For publication of the new array, we don't need a 'dmb ishst' here.
1924// The compiler generates 'dmb ishst' for all new-array insts.
1925    ret
1926.endm
1927
1928.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1929ENTRY \name
1930    // Fast path array allocation for region tlab allocation.
1931    // x0: mirror::Class* type
1932    // x1: int32_t component_count
1933    // x2-x7: free.
1934    mov    x3, x0
1935    \size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
1936    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
1937.Lslow_path\name:
1938    // x0: mirror::Class* klass
1939    // x1: int32_t component_count
1940    // x2: Thread* self
1941    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1942    mov    x2, xSELF                  // pass Thread::Current
1943    bl     \entrypoint
1944    RESTORE_SAVE_REFS_ONLY_FRAME
1945    REFRESH_MARKING_REGISTER
1946    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1947END \name
1948.endm
1949
1950.macro COMPUTE_ARRAY_SIZE_UNKNOWN xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1951    // Array classes are never finalizable or uninitialized, no need to check.
1952    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
1953    UNPOISON_HEAP_REF \wTemp0
1954    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1955    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
1956                                                              // bits.
1957                                                              // xCount is holding a 32 bit value,
1958                                                              // it can not overflow.
1959    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
1960    // Add array data offset and alignment.
1961    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1962#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1963#error Long array data offset must be 4 greater than int array data offset.
1964#endif
1965
1966    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
1967                                                              // component size shift is 3
1968                                                              // (for 64 bit alignment).
1969    and    \xTemp0, \xTemp0, #4
1970    add    \xTemp1, \xTemp1, \xTemp0
1971.endm
1972
1973.macro COMPUTE_ARRAY_SIZE_8 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1974    // Add array data offset and alignment.
1975    add    \xTemp1, \xCount, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1976.endm
1977
1978.macro COMPUTE_ARRAY_SIZE_16 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1979    lsl    \xTemp1, \xCount, #1
1980    // Add array data offset and alignment.
1981    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1982.endm
1983
1984.macro COMPUTE_ARRAY_SIZE_32 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1985    lsl    \xTemp1, \xCount, #2
1986    // Add array data offset and alignment.
1987    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1988.endm
1989
1990.macro COMPUTE_ARRAY_SIZE_64 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1991    lsl    \xTemp1, \xCount, #3
1992    // Add array data offset and alignment.
1993    add    \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1994.endm
1995
1996// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
1997// the entrypoint once all backends have been updated to use the size variants.
1998GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1999GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
2000GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
2001GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
2002GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
2003GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
2004GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
2005GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
2006GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
2007GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
2008
2009    /*
2010     * Called by managed code when the thread has been asked to suspend.
2011     */
2012    .extern artTestSuspendFromCode
2013ENTRY art_quick_test_suspend
2014    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save callee saves for stack crawl
2015    mov    x0, xSELF
2016    bl     artTestSuspendFromCode             // (Thread*)
2017    RESTORE_SAVE_EVERYTHING_FRAME
2018    REFRESH_MARKING_REGISTER
2019    ret
2020END art_quick_test_suspend
2021
2022ENTRY art_quick_implicit_suspend
2023    mov    x0, xSELF
2024    SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
2025    bl     artTestSuspendFromCode             // (Thread*)
2026    RESTORE_SAVE_REFS_ONLY_FRAME
2027    REFRESH_MARKING_REGISTER
2028    ret
2029END art_quick_implicit_suspend
2030
2031     /*
2032     * Called by managed code that is attempting to call a method on a proxy class. On entry
2033     * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
2034     * method agrees with a ref and args callee save frame.
2035     */
2036     .extern artQuickProxyInvokeHandler
2037ENTRY art_quick_proxy_invoke_handler
2038    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2039    mov     x2, xSELF                   // pass Thread::Current
2040    mov     x3, sp                      // pass SP
2041    bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
2042    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2043    cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
2044    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
2045    REFRESH_MARKING_REGISTER
2046    fmov    d0, x0                      // Store result in d0 in case it was float or double
2047    ret                                 // return on success
2048.Lexception_in_proxy:
2049    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2050    DELIVER_PENDING_EXCEPTION
2051END art_quick_proxy_invoke_handler
2052
2053    /*
2054     * Called to resolve an imt conflict.
2055     * x0 is the conflict ArtMethod.
2056     * xIP1 is a hidden argument that holds the target interface method's dex method index.
2057     *
2058     * Note that this stub writes to xIP0, xIP1, x13-x15, and x0.
2059     */
2060    .extern artLookupResolvedMethod
2061ENTRY art_quick_imt_conflict_trampoline
2062    ldr xIP0, [sp, #0]  // Load referrer
2063    ubfx x15, xIP1, #0, #METHOD_DEX_CACHE_HASH_BITS  // Calculate DexCache method slot index.
2064    ldr xIP0, [xIP0, #ART_METHOD_DEX_CACHE_METHODS_OFFSET_64]   // Load dex cache methods array
2065    add xIP0, xIP0, x15, lsl #(POINTER_SIZE_SHIFT + 1)  // Load DexCache method slot address.
2066
2067    // Relaxed atomic load x14:x15 from the dex cache slot.
2068.Limt_conflict_trampoline_retry_load:
2069    ldxp x14, x15, [xIP0]
2070    stxp w13, x14, x15, [xIP0]
2071    cbnz w13, .Limt_conflict_trampoline_retry_load
2072
2073    cmp x15, xIP1       // Compare method index to see if we had a DexCache method hit.
2074    bne .Limt_conflict_trampoline_dex_cache_miss
2075.Limt_conflict_trampoline_have_interface_method:
2076    ldr xIP1, [x0, #ART_METHOD_JNI_OFFSET_64]  // Load ImtConflictTable
2077    ldr x0, [xIP1]  // Load first entry in ImtConflictTable.
2078.Limt_table_iterate:
2079    cmp x0, x14
2080    // Branch if found. Benchmarks have shown doing a branch here is better.
2081    beq .Limt_table_found
2082    // If the entry is null, the interface method is not in the ImtConflictTable.
2083    cbz x0, .Lconflict_trampoline
2084    // Iterate over the entries of the ImtConflictTable.
2085    ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
2086    b .Limt_table_iterate
2087.Limt_table_found:
2088    // We successfully hit an entry in the table. Load the target method
2089    // and jump to it.
2090    ldr x0, [xIP1, #__SIZEOF_POINTER__]
2091    ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
2092    br xIP0
2093.Lconflict_trampoline:
2094    // Call the runtime stub to populate the ImtConflictTable and jump to the
2095    // resolved method.
2096    mov x0, x14  // Load interface method
2097    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
2098.Limt_conflict_trampoline_dex_cache_miss:
2099    // We're not creating a proper runtime method frame here,
2100    // artLookupResolvedMethod() is not allowed to walk the stack.
2101
2102    // Save GPR args and return address, allocate space for FPR args, align stack.
2103    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, (8 * 8 + 8 * 8 + 8 + 8)
2104    SAVE_TWO_REGS x2, x3, 16
2105    SAVE_TWO_REGS x4, x5, 32
2106    SAVE_TWO_REGS x6, x7, 48
2107    SAVE_REG      xLR, (8 * 8 + 8 * 8 + 8)
2108
2109    // Save FPR args.
2110    stp d0, d1, [sp, #64]
2111    stp d2, d3, [sp, #80]
2112    stp d4, d5, [sp, #96]
2113    stp d6, d7, [sp, #112]
2114
2115    mov x0, xIP1                            // Pass method index.
2116    ldr x1, [sp, #(8 * 8 + 8 * 8 + 8 + 8)]  // Pass referrer.
2117    bl artLookupResolvedMethod              // (uint32_t method_index, ArtMethod* referrer)
2118    mov x14, x0   // Move the interface method to x14 where the loop above expects it.
2119
2120    // Restore FPR args.
2121    ldp d0, d1, [sp, #64]
2122    ldp d2, d3, [sp, #80]
2123    ldp d4, d5, [sp, #96]
2124    ldp d6, d7, [sp, #112]
2125
2126    // Restore GPR args and return address.
2127    RESTORE_REG      xLR, (8 * 8 + 8 * 8 + 8)
2128    RESTORE_TWO_REGS x2, x3, 16
2129    RESTORE_TWO_REGS x4, x5, 32
2130    RESTORE_TWO_REGS x6, x7, 48
2131    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, (8 * 8 + 8 * 8 + 8 + 8)
2132
2133    // If the method wasn't resolved, skip the lookup and go to artInvokeInterfaceTrampoline().
2134    cbz x14, .Lconflict_trampoline
2135    b .Limt_conflict_trampoline_have_interface_method
2136END art_quick_imt_conflict_trampoline
2137
2138ENTRY art_quick_resolution_trampoline
2139    SETUP_SAVE_REFS_AND_ARGS_FRAME
2140    mov x2, xSELF
2141    mov x3, sp
2142    bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
2143    cbz x0, 1f
2144    mov xIP0, x0            // Remember returned code pointer in xIP0.
2145    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
2146    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2147    REFRESH_MARKING_REGISTER
2148    br xIP0
21491:
2150    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2151    DELIVER_PENDING_EXCEPTION
2152END art_quick_resolution_trampoline
2153
2154/*
2155 * Generic JNI frame layout:
2156 *
2157 * #-------------------#
2158 * |                   |
2159 * | caller method...  |
2160 * #-------------------#    <--- SP on entry
2161 * | Return X30/LR     |
2162 * | X29/FP            |    callee save
2163 * | X28               |    callee save
2164 * | X27               |    callee save
2165 * | X26               |    callee save
2166 * | X25               |    callee save
2167 * | X24               |    callee save
2168 * | X23               |    callee save
2169 * | X22               |    callee save
2170 * | X21               |    callee save
2171 * | X20               |    callee save
2172 * | X19               |    callee save
2173 * | X7                |    arg7
2174 * | X6                |    arg6
2175 * | X5                |    arg5
2176 * | X4                |    arg4
2177 * | X3                |    arg3
2178 * | X2                |    arg2
2179 * | X1                |    arg1
2180 * | D7                |    float arg 8
2181 * | D6                |    float arg 7
2182 * | D5                |    float arg 6
2183 * | D4                |    float arg 5
2184 * | D3                |    float arg 4
2185 * | D2                |    float arg 3
2186 * | D1                |    float arg 2
2187 * | D0                |    float arg 1
2188 * | Method*           | <- X0
2189 * #-------------------#
2190 * | local ref cookie  | // 4B
2191 * | handle scope size | // 4B
2192 * #-------------------#
2193 * | JNI Call Stack    |
2194 * #-------------------#    <--- SP on native call
2195 * |                   |
2196 * | Stack for Regs    |    The trampoline assembly will pop these values
2197 * |                   |    into registers for native call
2198 * #-------------------#
2199 * | Native code ptr   |
2200 * #-------------------#
2201 * | Free scratch      |
2202 * #-------------------#
2203 * | Ptr to (1)        |    <--- SP
2204 * #-------------------#
2205 */
2206    /*
2207     * Called to do a generic JNI down-call
2208     */
2209ENTRY art_quick_generic_jni_trampoline
2210    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2211
2212    // Save SP , so we can have static CFI info.
2213    mov x28, sp
2214    .cfi_def_cfa_register x28
2215
2216    // This looks the same, but is different: this will be updated to point to the bottom
2217    // of the frame when the handle scope is inserted.
2218    mov xFP, sp
2219
2220    mov xIP0, #5120
2221    sub sp, sp, xIP0
2222
2223    // prepare for artQuickGenericJniTrampoline call
2224    // (Thread*,  SP)
2225    //    x0      x1   <= C calling convention
2226    //   xSELF    xFP  <= where they are
2227
2228    mov x0, xSELF   // Thread*
2229    mov x1, xFP
2230    bl artQuickGenericJniTrampoline  // (Thread*, sp)
2231
2232    // The C call will have registered the complete save-frame on success.
2233    // The result of the call is:
2234    // x0: pointer to native code, 0 on error.
2235    // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
2236
2237    // Check for error = 0.
2238    cbz x0, .Lexception_in_native
2239
2240    // Release part of the alloca.
2241    mov sp, x1
2242
2243    // Save the code pointer
2244    mov xIP0, x0
2245
2246    // Load parameters from frame into registers.
2247    // TODO Check with artQuickGenericJniTrampoline.
2248    //      Also, check again APPCS64 - the stack arguments are interleaved.
2249    ldp x0, x1, [sp]
2250    ldp x2, x3, [sp, #16]
2251    ldp x4, x5, [sp, #32]
2252    ldp x6, x7, [sp, #48]
2253
2254    ldp d0, d1, [sp, #64]
2255    ldp d2, d3, [sp, #80]
2256    ldp d4, d5, [sp, #96]
2257    ldp d6, d7, [sp, #112]
2258
2259    add sp, sp, #128
2260
2261    blr xIP0        // native call.
2262
2263    // result sign extension is handled in C code
2264    // prepare for artQuickGenericJniEndTrampoline call
2265    // (Thread*, result, result_f)
2266    //    x0       x1       x2        <= C calling convention
2267    mov x1, x0      // Result (from saved).
2268    mov x0, xSELF   // Thread register.
2269    fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
2270
2271    bl artQuickGenericJniEndTrampoline
2272
2273    // Pending exceptions possible.
2274    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2275    cbnz x2, .Lexception_in_native
2276
2277    // Tear down the alloca.
2278    mov sp, x28
2279    .cfi_def_cfa_register sp
2280
2281    // Tear down the callee-save frame.
2282    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2283    REFRESH_MARKING_REGISTER
2284
2285    // store into fpr, for when it's a fpr return...
2286    fmov d0, x0
2287    ret
2288
2289.Lexception_in_native:
2290    // Move to x1 then sp to please assembler.
2291    ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
2292    mov sp, x1
2293    .cfi_def_cfa_register sp
2294    # This will create a new save-all frame, required by the runtime.
2295    DELIVER_PENDING_EXCEPTION
2296END art_quick_generic_jni_trampoline
2297
2298/*
2299 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
2300 * of a quick call:
2301 * x0 = method being called/to bridge to.
2302 * x1..x7, d0..d7 = arguments to that method.
2303 */
2304ENTRY art_quick_to_interpreter_bridge
2305    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
2306
2307    //  x0 will contain mirror::ArtMethod* method.
2308    mov x1, xSELF                          // How to get Thread::Current() ???
2309    mov x2, sp
2310
2311    // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
2312    //                                      mirror::ArtMethod** sp)
2313    bl   artQuickToInterpreterBridge
2314
2315    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
2316    REFRESH_MARKING_REGISTER
2317
2318    fmov d0, x0
2319
2320    RETURN_OR_DELIVER_PENDING_EXCEPTION
2321END art_quick_to_interpreter_bridge
2322
2323/*
2324 * Called to attempt to execute an obsolete method.
2325 */
2326ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
2327
2328
2329//
2330// Instrumentation-related stubs
2331//
2332    .extern artInstrumentationMethodEntryFromCode
2333ENTRY art_quick_instrumentation_entry
2334    SETUP_SAVE_REFS_AND_ARGS_FRAME
2335
2336    mov   x20, x0             // Preserve method reference in a callee-save.
2337
2338    mov   x2, xSELF
2339    mov   x3, sp  // Pass SP
2340    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP)
2341
2342    mov   xIP0, x0            // x0 = result of call.
2343    mov   x0, x20             // Reload method reference.
2344
2345    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
2346    REFRESH_MARKING_REGISTER
2347    cbz   xIP0, 1f            // Deliver the pending exception if method is null.
2348    adr   xLR, art_quick_instrumentation_exit
2349    br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
2350
23511:
2352    DELIVER_PENDING_EXCEPTION
2353END art_quick_instrumentation_entry
2354
2355    .extern artInstrumentationMethodExitFromCode
2356ENTRY art_quick_instrumentation_exit
2357    mov   xLR, #0             // Clobber LR for later checks.
2358
2359    SETUP_SAVE_REFS_ONLY_FRAME
2360
2361    str x0, [sp, #-16]!       // Save integer result.
2362    .cfi_adjust_cfa_offset 16
2363    str d0, [sp, #8]          // Save floating-point result.
2364
2365    add   x3, sp, #8          // Pass floating-point result pointer.
2366    mov   x2, sp              // Pass integer result pointer.
2367    add   x1, sp, #16         // Pass SP.
2368    mov   x0, xSELF           // Pass Thread.
2369    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res*, fpr_res*)
2370
2371    mov   xIP0, x0            // Return address from instrumentation call.
2372    mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
2373
2374    ldr   d0, [sp, #8]        // Restore floating-point result.
2375    ldr   x0, [sp], #16       // Restore integer result, and drop stack area.
2376    .cfi_adjust_cfa_offset -16
2377
2378    RESTORE_SAVE_REFS_ONLY_FRAME
2379    REFRESH_MARKING_REGISTER
2380    cbz   xIP0, 1f            // Handle error
2381    br    xIP0                // Tail-call out.
23821:
2383    DELIVER_PENDING_EXCEPTION
2384END art_quick_instrumentation_exit
2385
2386    /*
2387     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
2388     * will long jump to the upcall with a special exception of -1.
2389     */
2390    .extern artDeoptimize
2391ENTRY art_quick_deoptimize
2392    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
2393    mov    x0, xSELF          // Pass thread.
2394    bl     artDeoptimize      // (Thread*)
2395    brk 0
2396END art_quick_deoptimize
2397
2398    /*
2399     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
2400     * will long jump to the upcall with a special exception of -1.
2401     */
2402    .extern artDeoptimizeFromCompiledCode
2403ENTRY art_quick_deoptimize_from_compiled_code
2404    SETUP_SAVE_EVERYTHING_FRAME
2405    mov    x1, xSELF                      // Pass thread.
2406    bl     artDeoptimizeFromCompiledCode  // (DeoptimizationKind, Thread*)
2407    brk 0
2408END art_quick_deoptimize_from_compiled_code
2409
2410
2411    /*
2412     * String's indexOf.
2413     *
2414     * TODO: Not very optimized.
2415     * On entry:
2416     *    x0:   string object (known non-null)
2417     *    w1:   char to match (known <= 0xFFFF)
2418     *    w2:   Starting offset in string data
2419     */
2420ENTRY art_quick_indexof
2421#if (STRING_COMPRESSION_FEATURE)
2422    ldr   w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
2423#else
2424    ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
2425#endif
2426    add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
2427#if (STRING_COMPRESSION_FEATURE)
2428    /* w4 holds count (with flag) and w3 holds actual length */
2429    lsr   w3, w4, #1
2430#endif
2431    /* Clamp start to [0..count] */
2432    cmp   w2, #0
2433    csel  w2, wzr, w2, lt
2434    cmp   w2, w3
2435    csel  w2, w3, w2, gt
2436
2437    /* Save a copy to compute result */
2438    mov   x5, x0
2439
2440#if (STRING_COMPRESSION_FEATURE)
2441    tbz   w4, #0, .Lstring_indexof_compressed
2442#endif
2443    /* Build pointer to start of data to compare and pre-bias */
2444    add   x0, x0, x2, lsl #1
2445    sub   x0, x0, #2
2446    /* Compute iteration count */
2447    sub   w2, w3, w2
2448
2449    /*
2450     * At this point we have:
2451     *  x0: start of the data to test
2452     *  w1: char to compare
2453     *  w2: iteration count
2454     *  x5: original start of string data
2455     */
2456
2457    subs  w2, w2, #4
2458    b.lt  .Lindexof_remainder
2459
2460.Lindexof_loop4:
2461    ldrh  w6, [x0, #2]!
2462    ldrh  w7, [x0, #2]!
2463    ldrh  wIP0, [x0, #2]!
2464    ldrh  wIP1, [x0, #2]!
2465    cmp   w6, w1
2466    b.eq  .Lmatch_0
2467    cmp   w7, w1
2468    b.eq  .Lmatch_1
2469    cmp   wIP0, w1
2470    b.eq  .Lmatch_2
2471    cmp   wIP1, w1
2472    b.eq  .Lmatch_3
2473    subs  w2, w2, #4
2474    b.ge  .Lindexof_loop4
2475
2476.Lindexof_remainder:
2477    adds  w2, w2, #4
2478    b.eq  .Lindexof_nomatch
2479
2480.Lindexof_loop1:
2481    ldrh  w6, [x0, #2]!
2482    cmp   w6, w1
2483    b.eq  .Lmatch_3
2484    subs  w2, w2, #1
2485    b.ne  .Lindexof_loop1
2486
2487.Lindexof_nomatch:
2488    mov   x0, #-1
2489    ret
2490
2491.Lmatch_0:
2492    sub   x0, x0, #6
2493    sub   x0, x0, x5
2494    asr   x0, x0, #1
2495    ret
2496.Lmatch_1:
2497    sub   x0, x0, #4
2498    sub   x0, x0, x5
2499    asr   x0, x0, #1
2500    ret
2501.Lmatch_2:
2502    sub   x0, x0, #2
2503    sub   x0, x0, x5
2504    asr   x0, x0, #1
2505    ret
2506.Lmatch_3:
2507    sub   x0, x0, x5
2508    asr   x0, x0, #1
2509    ret
2510#if (STRING_COMPRESSION_FEATURE)
2511   /*
2512    * Comparing compressed string character-per-character with
2513    * input character
2514    */
2515.Lstring_indexof_compressed:
2516    add   x0, x0, x2
2517    sub   x0, x0, #1
2518    sub   w2, w3, w2
2519.Lstring_indexof_compressed_loop:
2520    subs  w2, w2, #1
2521    b.lt  .Lindexof_nomatch
2522    ldrb  w6, [x0, #1]!
2523    cmp   w6, w1
2524    b.eq  .Lstring_indexof_compressed_matched
2525    b     .Lstring_indexof_compressed_loop
2526.Lstring_indexof_compressed_matched:
2527    sub   x0, x0, x5
2528    ret
2529#endif
2530END art_quick_indexof
2531
2532    /*
2533     * Create a function `name` calling the ReadBarrier::Mark routine,
2534     * getting its argument and returning its result through W register
2535     * `wreg` (corresponding to X register `xreg`), saving and restoring
2536     * all caller-save registers.
2537     *
2538     * If `wreg` is different from `w0`, the generated function follows a
2539     * non-standard runtime calling convention:
2540     * - register `wreg` is used to pass the (sole) argument of this
2541     *   function (instead of W0);
2542     * - register `wreg` is used to return the result of this function
2543     *   (instead of W0);
2544     * - W0 is treated like a normal (non-argument) caller-save register;
2545     * - everything else is the same as in the standard runtime calling
2546     *   convention (e.g. standard callee-save registers are preserved).
2547     */
2548.macro READ_BARRIER_MARK_REG name, wreg, xreg
2549ENTRY \name
2550    // Reference is null, no work to do at all.
2551    cbz \wreg, .Lret_rb_\name
2552    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
2553    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2554    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
2555.Lret_rb_\name:
2556    ret
2557.Lnot_marked_rb_\name:
2558    // Check if the top two bits are one, if this is the case it is a forwarding address.
2559    tst   wIP0, wIP0, lsl #1
2560    bmi   .Lret_forwarding_address\name
2561.Lslow_rb_\name:
2562    /*
2563     * Allocate 44 stack slots * 8 = 352 bytes:
2564     * - 20 slots for core registers X0-15, X17-X19, LR
2565     * - 24 slots for floating-point registers D0-D7 and D16-D31
2566     */
2567    // We must not clobber IP1 since code emitted for HLoadClass and HLoadString
2568    // relies on IP1 being preserved.
2569    // Save all potentially live caller-save core registers.
2570    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
2571    SAVE_TWO_REGS  x2,  x3, 16
2572    SAVE_TWO_REGS  x4,  x5, 32
2573    SAVE_TWO_REGS  x6,  x7, 48
2574    SAVE_TWO_REGS  x8,  x9, 64
2575    SAVE_TWO_REGS x10, x11, 80
2576    SAVE_TWO_REGS x12, x13, 96
2577    SAVE_TWO_REGS x14, x15, 112
2578    SAVE_TWO_REGS x17, x18, 128  // Skip x16, i.e. IP0.
2579    SAVE_TWO_REGS x19, xLR, 144  // Save also return address.
2580    // Save all potentially live caller-save floating-point registers.
2581    stp   d0, d1,   [sp, #160]
2582    stp   d2, d3,   [sp, #176]
2583    stp   d4, d5,   [sp, #192]
2584    stp   d6, d7,   [sp, #208]
2585    stp   d16, d17, [sp, #224]
2586    stp   d18, d19, [sp, #240]
2587    stp   d20, d21, [sp, #256]
2588    stp   d22, d23, [sp, #272]
2589    stp   d24, d25, [sp, #288]
2590    stp   d26, d27, [sp, #304]
2591    stp   d28, d29, [sp, #320]
2592    stp   d30, d31, [sp, #336]
2593
2594    .ifnc \wreg, w0
2595      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
2596    .endif
2597    bl    artReadBarrierMark            // artReadBarrierMark(obj)
2598    .ifnc \wreg, w0
2599      mov   \wreg, w0                   // Return result into `wreg`
2600    .endif
2601
2602    // Restore core regs, except `xreg`, as `wreg` is used to return the
2603    // result of this function (simply remove it from the stack instead).
2604    POP_REGS_NE x0, x1,   0,   \xreg
2605    POP_REGS_NE x2, x3,   16,  \xreg
2606    POP_REGS_NE x4, x5,   32,  \xreg
2607    POP_REGS_NE x6, x7,   48,  \xreg
2608    POP_REGS_NE x8, x9,   64,  \xreg
2609    POP_REGS_NE x10, x11, 80,  \xreg
2610    POP_REGS_NE x12, x13, 96,  \xreg
2611    POP_REGS_NE x14, x15, 112, \xreg
2612    POP_REGS_NE x17, x18, 128, \xreg
2613    POP_REGS_NE x19, xLR, 144, \xreg  // Restore also return address.
2614    // Restore floating-point registers.
2615    ldp   d0, d1,   [sp, #160]
2616    ldp   d2, d3,   [sp, #176]
2617    ldp   d4, d5,   [sp, #192]
2618    ldp   d6, d7,   [sp, #208]
2619    ldp   d16, d17, [sp, #224]
2620    ldp   d18, d19, [sp, #240]
2621    ldp   d20, d21, [sp, #256]
2622    ldp   d22, d23, [sp, #272]
2623    ldp   d24, d25, [sp, #288]
2624    ldp   d26, d27, [sp, #304]
2625    ldp   d28, d29, [sp, #320]
2626    ldp   d30, d31, [sp, #336]
2627    // Remove frame and return.
2628    DECREASE_FRAME 352
2629    ret
2630.Lret_forwarding_address\name:
2631    // Shift left by the forwarding address shift. This clears out the state bits since they are
2632    // in the top 2 bits of the lock word.
2633    lsl   \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2634    ret
2635END \name
2636.endm
2637
2638READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
2639READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
2640READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
2641READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
2642READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
2643READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
2644READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
2645READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
2646READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
2647READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
2648READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
2649READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
2650READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
2651READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
2652READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
2653READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
2654// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
2655READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
2656READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
2657READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
2658READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
2659READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
2660READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
2661READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
2662READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
2663READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
2664READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
2665READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
2666READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
2667READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
2668
2669
2670.macro SELECT_X_OR_W_FOR_MACRO macro_to_use, x, w, xreg
2671    .if \xreg
2672      \macro_to_use \x
2673    .else
2674      \macro_to_use \w
2675    .endif
2676.endm
2677
2678.macro FOR_REGISTERS macro_for_register, macro_for_reserved_register, xreg
2679    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x0, w0, \xreg
2680    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x1, w1, \xreg
2681    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x2, w2, \xreg
2682    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x3, w3, \xreg
2683    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x4, w4, \xreg
2684    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x5, w5, \xreg
2685    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x6, w6, \xreg
2686    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x7, w7, \xreg
2687    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x8, w8, \xreg
2688    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x9, w9, \xreg
2689    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x10, w10, \xreg
2690    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x11, w11, \xreg
2691    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x12, w12, \xreg
2692    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x13, w13, \xreg
2693    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x14, w14, \xreg
2694    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x15, w15, \xreg
2695    \macro_for_reserved_register  // IP0 is reserved
2696    \macro_for_reserved_register  // IP1 is reserved
2697    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x18, w18, \xreg
2698    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x19, w19, \xreg
2699    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x20, w20, \xreg
2700    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x21, w21, \xreg
2701    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x22, w22, \xreg
2702    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x23, w23, \xreg
2703    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x24, w24, \xreg
2704    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x25, w25, \xreg
2705    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x26, w26, \xreg
2706    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x27, w27, \xreg
2707    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x28, w28, \xreg
2708    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x29, w29, \xreg
2709    \macro_for_reserved_register  // lr is reserved
2710    \macro_for_reserved_register  // sp is reserved
2711.endm
2712
2713.macro FOR_XREGISTERS macro_for_register, macro_for_reserved_register
2714    FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 1
2715.endm
2716
2717.macro FOR_WREGISTERS macro_for_register, macro_for_reserved_register
2718    FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 0
2719.endm
2720
2721.macro BRK0_BRK0
2722    brk 0
2723    brk 0
2724.endm
2725
2726#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
2727#error "Array and field introspection code sharing requires same LDR offset."
2728#endif
2729.macro INTROSPECTION_ARRAY_LOAD index_reg
2730    ldr   wIP0, [xIP0, \index_reg, lsl #2]
2731    b     art_quick_read_barrier_mark_introspection
2732.endm
2733
2734.macro MOV_WIP0_TO_WREG_AND_BL_LR reg
2735    mov   \reg, wIP0
2736    br    lr  // Do not use RET as we do not enter the entrypoint with "BL".
2737.endm
2738
2739.macro READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH ldr_offset
2740    /*
2741     * Allocate 44 stack slots * 8 = 352 bytes:
2742     * - 19 slots for core registers X0-15, X18-X19, LR
2743     * - 1 slot padding
2744     * - 24 slots for floating-point registers D0-D7 and D16-D31
2745     */
2746    // Save all potentially live caller-save core registers.
2747    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
2748    SAVE_TWO_REGS  x2,  x3, 16
2749    SAVE_TWO_REGS  x4,  x5, 32
2750    SAVE_TWO_REGS  x6,  x7, 48
2751    SAVE_TWO_REGS  x8,  x9, 64
2752    SAVE_TWO_REGS x10, x11, 80
2753    SAVE_TWO_REGS x12, x13, 96
2754    SAVE_TWO_REGS x14, x15, 112
2755    SAVE_TWO_REGS x18, x19, 128       // Skip x16, x17, i.e. IP0, IP1.
2756    SAVE_REG      xLR,      144       // Save return address, skip padding at 152.
2757    // Save all potentially live caller-save floating-point registers.
2758    stp   d0, d1,   [sp, #160]
2759    stp   d2, d3,   [sp, #176]
2760    stp   d4, d5,   [sp, #192]
2761    stp   d6, d7,   [sp, #208]
2762    stp   d16, d17, [sp, #224]
2763    stp   d18, d19, [sp, #240]
2764    stp   d20, d21, [sp, #256]
2765    stp   d22, d23, [sp, #272]
2766    stp   d24, d25, [sp, #288]
2767    stp   d26, d27, [sp, #304]
2768    stp   d28, d29, [sp, #320]
2769    stp   d30, d31, [sp, #336]
2770
2771    mov   x0, xIP0
2772    bl    artReadBarrierMark          // artReadBarrierMark(obj)
2773    mov   xIP0, x0
2774
2775    // Restore core regs, except x0 and x1 as the return register switch case
2776    // address calculation is smoother with an extra register.
2777    RESTORE_TWO_REGS  x2,  x3, 16
2778    RESTORE_TWO_REGS  x4,  x5, 32
2779    RESTORE_TWO_REGS  x6,  x7, 48
2780    RESTORE_TWO_REGS  x8,  x9, 64
2781    RESTORE_TWO_REGS x10, x11, 80
2782    RESTORE_TWO_REGS x12, x13, 96
2783    RESTORE_TWO_REGS x14, x15, 112
2784    RESTORE_TWO_REGS x18, x19, 128    // Skip x16, x17, i.e. IP0, IP1.
2785    RESTORE_REG      xLR,      144    // Restore return address.
2786    // Restore caller-save floating-point registers.
2787    ldp   d0, d1,   [sp, #160]
2788    ldp   d2, d3,   [sp, #176]
2789    ldp   d4, d5,   [sp, #192]
2790    ldp   d6, d7,   [sp, #208]
2791    ldp   d16, d17, [sp, #224]
2792    ldp   d18, d19, [sp, #240]
2793    ldp   d20, d21, [sp, #256]
2794    ldp   d22, d23, [sp, #272]
2795    ldp   d24, d25, [sp, #288]
2796    ldp   d26, d27, [sp, #304]
2797    ldp   d28, d29, [sp, #320]
2798    ldp   d30, d31, [sp, #336]
2799
2800    ldr   x0, [lr, #\ldr_offset]      // Load the instruction.
2801    adr   xIP1, .Lmark_introspection_return_switch
2802    bfi   xIP1, x0, #3, #5            // Calculate switch case address.
2803    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 352
2804    br    xIP1
2805.endm
2806
2807    /*
2808     * Use introspection to load a reference from the same address as the LDR
2809     * instruction in generated code would load (unless loaded by the thunk,
2810     * see below), call ReadBarrier::Mark() with that reference if needed
2811     * and return it in the same register as the LDR instruction would load.
2812     *
2813     * The entrypoint is called through a thunk that differs across load kinds.
2814     * For field and array loads the LDR instruction in generated code follows
2815     * the branch to the thunk, i.e. the LDR is at [LR, #-4], and the thunk
2816     * knows the holder and performs the gray bit check, returning to the LDR
2817     * instruction if the object is not gray, so this entrypoint no longer
2818     * needs to know anything about the holder. For GC root loads, the LDR
2819     * instruction in generated code precedes the branch to the thunk (i.e.
2820     * the LDR is at [LR, #-8]) and the thunk does not do the gray bit check.
2821     *
2822     * For field accesses and array loads with a constant index the thunk loads
2823     * the reference into IP0 using introspection and calls the main entrypoint,
2824     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
2825     * the passed reference is poisoned.
2826     *
2827     * For array accesses with non-constant index, the thunk inserts the bits
2828     * 16-21 of the LDR instruction to the entrypoint address, effectively
2829     * calculating a switch case label based on the index register (bits 16-20)
2830     * and adding an extra offset (bit 21 is set) to differentiate from the
2831     * main entrypoint, then moves the base register to IP0 and jumps to the
2832     * switch case. Therefore we need to align the main entrypoint to 512 bytes,
2833     * accounting for a 256-byte offset followed by 32 array entrypoints
2834     * starting at art_quick_read_barrier_mark_introspection_arrays, each
2835     * containing an LDR (register) and a branch to the main entrypoint.
2836     *
2837     * For GC root accesses we cannot use the main entrypoint because of the
2838     * different offset where the LDR instruction in generated code is located.
2839     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2840     * To re-use the same entrypoint pointer in generated code, we make sure
2841     * that the gc root entrypoint (a copy of the entrypoint with a different
2842     * offset for introspection loads) is located at a known offset (768 bytes,
2843     * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
2844     * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
2845     * the root register to IP0 and jumps to the customized entrypoint,
2846     * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
2847     * performs all the fast-path checks, so we need just the slow path.
2848     *
2849     * The code structure is
2850     *   art_quick_read_barrier_mark_introspection:
2851     *     Up to 256 bytes for the main entrypoint code.
2852     *     Padding to 256 bytes if needed.
2853     *   art_quick_read_barrier_mark_introspection_arrays:
2854     *     Exactly 256 bytes for array load switch cases (32x2 instructions).
2855     *   .Lmark_introspection_return_switch:
2856     *     Exactly 256 bytes for return switch cases (32x2 instructions).
2857     *   art_quick_read_barrier_mark_introspection_gc_roots:
2858     *     GC root entrypoint code.
2859     */
2860    .balign 512
2861ENTRY art_quick_read_barrier_mark_introspection
2862    // At this point, IP0 contains the reference, IP1 can be freely used.
2863    // For heap poisoning, the reference is poisoned, so unpoison it first.
2864    UNPOISON_HEAP_REF wIP0
2865    // If reference is null, just return it in the right register.
2866    cbz   wIP0, .Lmark_introspection_return
2867    // Use wIP1 as temp and check the mark bit of the reference.
2868    ldr   wIP1, [xIP0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2869    tbz   wIP1, #LOCK_WORD_MARK_BIT_SHIFT, .Lmark_introspection_unmarked
2870.Lmark_introspection_return:
2871    // Without an extra register for the return switch case address calculation,
2872    // we exploit the high word of the xIP0 to temporarily store the ref_reg*8,
2873    // so the return switch below must move wIP0 instead of xIP0 to the register.
2874    ldr   wIP1, [lr, #BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET]  // Load the instruction.
2875    bfi   xIP0, xIP1, #(32 + 3), #5   // Extract ref_reg*8 to high word in xIP0.
2876    adr   xIP1, .Lmark_introspection_return_switch
2877    bfxil xIP1, xIP0, #32, #8         // Calculate return switch case address.
2878    br    xIP1
2879.Lmark_introspection_unmarked:
2880    // Check if the top two bits are one, if this is the case it is a forwarding address.
2881    tst   wIP1, wIP1, lsl #1
2882    bmi   .Lmark_introspection_forwarding_address
2883    READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
2884
2885.Lmark_introspection_forwarding_address:
2886    // Shift left by the forwarding address shift. This clears out the state bits since they are
2887    // in the top 2 bits of the lock word.
2888    lsl   wIP0, wIP1, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2889    b .Lmark_introspection_return
2890
2891    // We're very close to the alloted 256B for the entrypoint code before the
2892    // array switch cases. Should we go a little bit over the limit, we can
2893    // move some code after the array switch cases and return switch cases.
2894    .balign 256
2895    .hidden art_quick_read_barrier_mark_introspection_arrays
2896    .global art_quick_read_barrier_mark_introspection_arrays
2897art_quick_read_barrier_mark_introspection_arrays:
2898    FOR_XREGISTERS INTROSPECTION_ARRAY_LOAD, BRK0_BRK0
2899.Lmark_introspection_return_switch:
2900    FOR_WREGISTERS MOV_WIP0_TO_WREG_AND_BL_LR, BRK0_BRK0
2901    .hidden art_quick_read_barrier_mark_introspection_gc_roots
2902    .global art_quick_read_barrier_mark_introspection_gc_roots
2903art_quick_read_barrier_mark_introspection_gc_roots:
2904    READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
2905END art_quick_read_barrier_mark_introspection
2906
2907.extern artInvokePolymorphic
2908ENTRY art_quick_invoke_polymorphic
2909    SETUP_SAVE_REFS_AND_ARGS_FRAME                // Save callee saves in case allocation triggers GC.
2910    mov     x2, xSELF
2911    mov     x3, sp
2912    INCREASE_FRAME 16                             // Reserve space for JValue result.
2913    str     xzr, [sp, #0]                         // Initialize result to zero.
2914    mov     x0, sp                                // Set r0 to point to result.
2915    bl      artInvokePolymorphic                  // ArtInvokePolymorphic(result, receiver, thread, save_area)
2916    uxtb    w0, w0                                // Result is the return type descriptor as a char.
2917    sub     w0, w0, 'A'                           // Convert to zero based index.
2918    cmp     w0, 'Z' - 'A'
2919    bhi     .Lcleanup_and_return                  // Clean-up if out-of-bounds.
2920    adrp    x1, .Lhandler_table                   // Compute address of handler table.
2921    add     x1, x1, :lo12:.Lhandler_table
2922    ldrb    w0, [x1, w0, uxtw]                    // Lookup handler offset in handler table.
2923    adr     x1, .Lstart_of_handlers
2924    add     x0, x1, w0, sxtb #2                   // Convert relative offset to absolute address.
2925    br      x0                                    // Branch to handler.
2926
2927.Lstart_of_handlers:
2928.Lstore_boolean_result:
2929    ldrb    w0, [sp]
2930    b       .Lcleanup_and_return
2931.Lstore_char_result:
2932    ldrh    w0, [sp]
2933    b       .Lcleanup_and_return
2934.Lstore_float_result:
2935    ldr     s0, [sp]
2936    str     s0, [sp, #32]
2937    b       .Lcleanup_and_return
2938.Lstore_double_result:
2939    ldr     d0, [sp]
2940    str     d0, [sp, #32]
2941    b       .Lcleanup_and_return
2942.Lstore_long_result:
2943    ldr     x0, [sp]
2944    // Fall-through
2945.Lcleanup_and_return:
2946    DECREASE_FRAME 16
2947    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2948    REFRESH_MARKING_REGISTER
2949    RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
2950
2951    .section    .rodata                           // Place handler table in read-only section away from text.
2952    .align  2
2953.macro HANDLER_TABLE_OFFSET handler_label
2954    .byte (\handler_label - .Lstart_of_handlers) / 4
2955.endm
2956.Lhandler_table:
2957    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
2958    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // B (byte)
2959    HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
2960    HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
2961    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
2962    HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
2963    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
2964    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
2965    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // I (int)
2966    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
2967    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
2968    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // L (object - references are compressed and only 32-bits)
2969    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
2970    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
2971    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
2972    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
2973    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
2974    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
2975    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // S (short)
2976    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
2977    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
2978    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
2979    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
2980    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
2981    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
2982    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
2983    .text
2984
2985END  art_quick_invoke_polymorphic
2986