quick_entrypoints_arm64.S revision e26c6ff5c28f803406a5bcbda8ac27a4e718fde6
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm64.S"
18
19#include "arch/quick_alloc_entrypoints.S"
20
21
22.macro INCREASE_FRAME frame_adjustment
23    sub sp, sp, #(\frame_adjustment)
24    .cfi_adjust_cfa_offset (\frame_adjustment)
25.endm
26
27.macro DECREASE_FRAME frame_adjustment
28    add sp, sp, #(\frame_adjustment)
29    .cfi_adjust_cfa_offset -(\frame_adjustment)
30.endm
31
32.macro SAVE_REG reg, offset
33    str \reg, [sp, #(\offset)]
34    .cfi_rel_offset \reg, (\offset)
35.endm
36
37.macro RESTORE_REG reg, offset
38    ldr \reg, [sp, #(\offset)]
39    .cfi_restore \reg
40.endm
41
42.macro SAVE_REG_INCREASE_FRAME reg, frame_adjustment
43    str \reg, [sp, #-(\frame_adjustment)]!
44    .cfi_adjust_cfa_offset (\frame_adjustment)
45    .cfi_rel_offset \reg, 0
46.endm
47
48.macro RESTORE_REG_DECREASE_FRAME reg, frame_adjustment
49    ldr \reg, [sp], #(\frame_adjustment)
50    .cfi_restore \reg
51    .cfi_adjust_cfa_offset -(\frame_adjustment)
52.endm
53
54.macro SAVE_TWO_REGS reg1, reg2, offset
55    stp \reg1, \reg2, [sp, #(\offset)]
56    .cfi_rel_offset \reg1, (\offset)
57    .cfi_rel_offset \reg2, (\offset) + 8
58.endm
59
60.macro RESTORE_TWO_REGS reg1, reg2, offset
61    ldp \reg1, \reg2, [sp, #(\offset)]
62    .cfi_restore \reg1
63    .cfi_restore \reg2
64.endm
65
66.macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
67    stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
68    .cfi_adjust_cfa_offset (\frame_adjustment)
69    .cfi_rel_offset \reg1, 0
70    .cfi_rel_offset \reg2, 8
71.endm
72
73.macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
74    ldp \reg1, \reg2, [sp], #(\frame_adjustment)
75    .cfi_restore \reg1
76    .cfi_restore \reg2
77    .cfi_adjust_cfa_offset -(\frame_adjustment)
78.endm
79
80    /*
81     * Macro that sets up the callee save frame to conform with
82     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
83     */
84.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
85    // art::Runtime** xIP0 = &art::Runtime::instance_
86    adrp xIP0, :got:_ZN3art7Runtime9instance_E
87    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
88
89    // Our registers aren't intermixed - just spill in order.
90    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
91
92    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
93    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
94
95    INCREASE_FRAME 176
96
97    // Ugly compile-time check, but we only have the preprocessor.
98#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
99#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
100#endif
101
102    // Stack alignment filler [sp, #8].
103    // FP callee-saves.
104    stp d8, d9,   [sp, #16]
105    stp d10, d11, [sp, #32]
106    stp d12, d13, [sp, #48]
107    stp d14, d15, [sp, #64]
108
109    // GP callee-saves
110    SAVE_TWO_REGS x19, x20, 80
111    SAVE_TWO_REGS x21, x22, 96
112    SAVE_TWO_REGS x23, x24, 112
113    SAVE_TWO_REGS x25, x26, 128
114    SAVE_TWO_REGS x27, x28, 144
115    SAVE_TWO_REGS x29, xLR, 160
116
117    // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
118    str xIP0, [sp]
119    // Place sp in Thread::Current()->top_quick_frame.
120    mov xIP0, sp
121    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
122.endm
123
124    /*
125     * Macro that sets up the callee save frame to conform with
126     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
127     */
128.macro SETUP_SAVE_REFS_ONLY_FRAME
129    // art::Runtime** xIP0 = &art::Runtime::instance_
130    adrp xIP0, :got:_ZN3art7Runtime9instance_E
131    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
132
133    // Our registers aren't intermixed - just spill in order.
134    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
135
136    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
137    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
138
139    INCREASE_FRAME 96
140
141    // Ugly compile-time check, but we only have the preprocessor.
142#if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
143#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
144#endif
145
146    // GP callee-saves.
147    // x20 paired with ArtMethod* - see below.
148    SAVE_TWO_REGS x21, x22, 16
149    SAVE_TWO_REGS x23, x24, 32
150    SAVE_TWO_REGS x25, x26, 48
151    SAVE_TWO_REGS x27, x28, 64
152    SAVE_TWO_REGS x29, xLR, 80
153
154    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
155    // Note: We could avoid saving X20 in the case of Baker read
156    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
157    // later; but it's not worth handling this special case.
158    stp xIP0, x20, [sp]
159    .cfi_rel_offset x20, 8
160
161    // Place sp in Thread::Current()->top_quick_frame.
162    mov xIP0, sp
163    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
164.endm
165
166// TODO: Probably no need to restore registers preserved by aapcs64.
167.macro RESTORE_SAVE_REFS_ONLY_FRAME
168    // Callee-saves.
169    // Note: Likewise, we could avoid restoring X20 in the case of Baker
170    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
171    // later; but it's not worth handling this special case.
172    RESTORE_REG x20, 8
173    RESTORE_TWO_REGS x21, x22, 16
174    RESTORE_TWO_REGS x23, x24, 32
175    RESTORE_TWO_REGS x25, x26, 48
176    RESTORE_TWO_REGS x27, x28, 64
177    RESTORE_TWO_REGS x29, xLR, 80
178
179    DECREASE_FRAME 96
180.endm
181
182.macro POP_SAVE_REFS_ONLY_FRAME
183    DECREASE_FRAME 96
184.endm
185
186
187.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
188    INCREASE_FRAME 224
189
190    // Ugly compile-time check, but we only have the preprocessor.
191#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
192#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
193#endif
194
195    // Stack alignment filler [sp, #8].
196    // FP args.
197    stp d0, d1, [sp, #16]
198    stp d2, d3, [sp, #32]
199    stp d4, d5, [sp, #48]
200    stp d6, d7, [sp, #64]
201
202    // Core args.
203    SAVE_TWO_REGS x1, x2, 80
204    SAVE_TWO_REGS x3, x4, 96
205    SAVE_TWO_REGS x5, x6, 112
206
207    // x7, Callee-saves.
208    // Note: We could avoid saving X20 in the case of Baker read
209    // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
210    // later; but it's not worth handling this special case.
211    SAVE_TWO_REGS x7, x20, 128
212    SAVE_TWO_REGS x21, x22, 144
213    SAVE_TWO_REGS x23, x24, 160
214    SAVE_TWO_REGS x25, x26, 176
215    SAVE_TWO_REGS x27, x28, 192
216
217    // x29(callee-save) and LR.
218    SAVE_TWO_REGS x29, xLR, 208
219
220.endm
221
222    /*
223     * Macro that sets up the callee save frame to conform with
224     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
225     *
226     * TODO This is probably too conservative - saving FP & LR.
227     */
228.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
229    // art::Runtime** xIP0 = &art::Runtime::instance_
230    adrp xIP0, :got:_ZN3art7Runtime9instance_E
231    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
232
233    // Our registers aren't intermixed - just spill in order.
234    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
235
236    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
237    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
238
239    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
240
241    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
242    // Place sp in Thread::Current()->top_quick_frame.
243    mov xIP0, sp
244    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
245.endm
246
247.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
248    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
249    str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
250    // Place sp in Thread::Current()->top_quick_frame.
251    mov xIP0, sp
252    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
253.endm
254
255// TODO: Probably no need to restore registers preserved by aapcs64.
256.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
257    // FP args.
258    ldp d0, d1, [sp, #16]
259    ldp d2, d3, [sp, #32]
260    ldp d4, d5, [sp, #48]
261    ldp d6, d7, [sp, #64]
262
263    // Core args.
264    RESTORE_TWO_REGS x1, x2, 80
265    RESTORE_TWO_REGS x3, x4, 96
266    RESTORE_TWO_REGS x5, x6, 112
267
268    // x7, Callee-saves.
269    // Note: Likewise, we could avoid restoring X20 in the case of Baker
270    // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
271    // later; but it's not worth handling this special case.
272    RESTORE_TWO_REGS x7, x20, 128
273    RESTORE_TWO_REGS x21, x22, 144
274    RESTORE_TWO_REGS x23, x24, 160
275    RESTORE_TWO_REGS x25, x26, 176
276    RESTORE_TWO_REGS x27, x28, 192
277
278    // x29(callee-save) and LR.
279    RESTORE_TWO_REGS x29, xLR, 208
280
281    DECREASE_FRAME 224
282.endm
283
284    /*
285     * Macro that sets up the callee save frame to conform with
286     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
287     * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
288     * and saving registers x29 and LR is handled elsewhere.
289     */
290.macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
291    // Ugly compile-time check, but we only have the preprocessor.
292#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
293#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
294#endif
295
296    // Save FP registers.
297    // For better performance, store d0 and d31 separately, so that all STPs are 16-byte aligned.
298    str d0,       [sp, #8]
299    stp d1, d2,   [sp, #16]
300    stp d3, d4,   [sp, #32]
301    stp d5, d6,   [sp, #48]
302    stp d7, d8,   [sp, #64]
303    stp d9, d10,  [sp, #80]
304    stp d11, d12, [sp, #96]
305    stp d13, d14, [sp, #112]
306    stp d15, d16, [sp, #128]
307    stp d17, d18, [sp, #144]
308    stp d19, d20, [sp, #160]
309    stp d21, d22, [sp, #176]
310    stp d23, d24, [sp, #192]
311    stp d25, d26, [sp, #208]
312    stp d27, d28, [sp, #224]
313    stp d29, d30, [sp, #240]
314    str d31,      [sp, #256]
315
316    // Save core registers.
317    SAVE_REG            x0, 264
318    SAVE_TWO_REGS  x1,  x2, 272
319    SAVE_TWO_REGS  x3,  x4, 288
320    SAVE_TWO_REGS  x5,  x6, 304
321    SAVE_TWO_REGS  x7,  x8, 320
322    SAVE_TWO_REGS  x9, x10, 336
323    SAVE_TWO_REGS x11, x12, 352
324    SAVE_TWO_REGS x13, x14, 368
325    SAVE_TWO_REGS x15, x16, 384
326    SAVE_REG x17, 400
327    SAVE_TWO_REGS x19, x20, 416
328    SAVE_TWO_REGS x21, x22, 432
329    SAVE_TWO_REGS x23, x24, 448
330    SAVE_TWO_REGS x25, x26, 464
331    SAVE_TWO_REGS x27, x28, 480
332
333    // art::Runtime** xIP0 = &art::Runtime::instance_
334    adrp xIP0, :got:_ZN3art7Runtime9instance_E
335    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
336
337    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
338
339    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
340    ldr xIP0, [xIP0, \runtime_method_offset]
341
342    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
343    str xIP0, [sp]
344    // Place sp in Thread::Current()->top_quick_frame.
345    mov xIP0, sp
346    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
347.endm
348
349    /*
350     * Macro that sets up the callee save frame to conform with
351     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
352     */
353.macro SETUP_SAVE_EVERYTHING_FRAME runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
354    INCREASE_FRAME 512
355    SAVE_TWO_REGS x29, xLR, 496
356    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR \runtime_method_offset
357.endm
358
359.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
360    // Restore FP registers.
361    // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
362    ldr d0,       [sp, #8]
363    ldp d1, d2,   [sp, #16]
364    ldp d3, d4,   [sp, #32]
365    ldp d5, d6,   [sp, #48]
366    ldp d7, d8,   [sp, #64]
367    ldp d9, d10,  [sp, #80]
368    ldp d11, d12, [sp, #96]
369    ldp d13, d14, [sp, #112]
370    ldp d15, d16, [sp, #128]
371    ldp d17, d18, [sp, #144]
372    ldp d19, d20, [sp, #160]
373    ldp d21, d22, [sp, #176]
374    ldp d23, d24, [sp, #192]
375    ldp d25, d26, [sp, #208]
376    ldp d27, d28, [sp, #224]
377    ldp d29, d30, [sp, #240]
378    ldr d31,      [sp, #256]
379
380    // Restore core registers, except x0.
381    RESTORE_TWO_REGS  x1,  x2, 272
382    RESTORE_TWO_REGS  x3,  x4, 288
383    RESTORE_TWO_REGS  x5,  x6, 304
384    RESTORE_TWO_REGS  x7,  x8, 320
385    RESTORE_TWO_REGS  x9, x10, 336
386    RESTORE_TWO_REGS x11, x12, 352
387    RESTORE_TWO_REGS x13, x14, 368
388    RESTORE_TWO_REGS x15, x16, 384
389    RESTORE_REG      x17,      400
390    RESTORE_TWO_REGS x19, x20, 416
391    RESTORE_TWO_REGS x21, x22, 432
392    RESTORE_TWO_REGS x23, x24, 448
393    RESTORE_TWO_REGS x25, x26, 464
394    RESTORE_TWO_REGS x27, x28, 480
395    RESTORE_TWO_REGS x29, xLR, 496
396
397    DECREASE_FRAME 512
398.endm
399
400.macro RESTORE_SAVE_EVERYTHING_FRAME
401    RESTORE_REG  x0, 264
402    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
403.endm
404
405// Macro to refresh the Marking Register (W20).
406//
407// This macro must be called at the end of functions implementing
408// entrypoints that possibly (directly or indirectly) perform a
409// suspend check (before they return).
410.macro REFRESH_MARKING_REGISTER
411#if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
412    ldr wMR, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
413#endif
414.endm
415
416.macro RETURN_IF_RESULT_IS_ZERO
417    cbnz x0, 1f                // result non-zero branch over
418    ret                        // return
4191:
420.endm
421
422.macro RETURN_IF_RESULT_IS_NON_ZERO
423    cbz x0, 1f                 // result zero branch over
424    ret                        // return
4251:
426.endm
427
428    /*
429     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
430     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
431     */
432.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
433    mov x0, xSELF
434
435    // Point of no return.
436    bl artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*)
437    brk 0  // Unreached
438.endm
439
440    /*
441     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
442     * exception is Thread::Current()->exception_.
443     */
444.macro DELIVER_PENDING_EXCEPTION
445    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
446    DELIVER_PENDING_EXCEPTION_FRAME_READY
447.endm
448
449.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
450    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
451    cbnz \reg, 1f
452    ret
4531:
454    DELIVER_PENDING_EXCEPTION
455.endm
456
457.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
458    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0
459.endm
460
461// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
462.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
463    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
464.endm
465
466.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
467    cbnz w0, 1f                // result non-zero branch over
468    ret                        // return
4691:
470    DELIVER_PENDING_EXCEPTION
471.endm
472
473.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
474    .extern \cxx_name
475ENTRY \c_name
476    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
477    mov x0, xSELF                     // pass Thread::Current
478    bl  \cxx_name                     // \cxx_name(Thread*)
479    brk 0
480END \c_name
481.endm
482
483.macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
484    .extern \cxx_name
485ENTRY \c_name
486    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
487    mov x0, xSELF                     // pass Thread::Current
488    bl  \cxx_name                     // \cxx_name(Thread*)
489    brk 0
490END \c_name
491.endm
492
493.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
494    .extern \cxx_name
495ENTRY \c_name
496    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
497    mov x1, xSELF                     // pass Thread::Current.
498    bl  \cxx_name                     // \cxx_name(arg, Thread*).
499    brk 0
500END \c_name
501.endm
502
503.macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
504    .extern \cxx_name
505ENTRY \c_name
506    SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
507    mov x2, xSELF                     // pass Thread::Current
508    bl  \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
509    brk 0
510END \c_name
511.endm
512
513    /*
514     * Called by managed code, saves callee saves and then calls artThrowException
515     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
516     */
517ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
518
519    /*
520     * Called by managed code to create and deliver a NullPointerException.
521     */
522NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
523
524    /*
525     * Call installed by a signal handler to create and deliver a NullPointerException.
526     */
527    .extern art_quick_throw_null_pointer_exception_from_signal
528ENTRY art_quick_throw_null_pointer_exception_from_signal
529    // The fault handler pushes the gc map address, i.e. "return address", to stack
530    // and passes the fault address in LR. So we need to set up the CFI info accordingly.
531    .cfi_def_cfa_offset __SIZEOF_POINTER__
532    .cfi_rel_offset lr, 0
533    // Save all registers as basis for long jump context.
534    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
535    SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)  // LR already saved.
536    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
537    mov x0, lr                        // pass the fault address stored in LR by the fault handler.
538    mov x1, xSELF                     // pass Thread::Current.
539    bl  artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
540    brk 0
541END art_quick_throw_null_pointer_exception_from_signal
542
543    /*
544     * Called by managed code to create and deliver an ArithmeticException.
545     */
546NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
547
548    /*
549     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
550     * index, arg2 holds limit.
551     */
552TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
553
554    /*
555     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
556     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
557     */
558TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
559
560    /*
561     * Called by managed code to create and deliver a StackOverflowError.
562     */
563NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
564
565    /*
566     * All generated callsites for interface invokes and invocation slow paths will load arguments
567     * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
568     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
569     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
570     *
571     * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
572     * of the target Method* in x0 and method->code_ in x1.
573     *
574     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
575     * thread and we branch to another stub to deliver it.
576     *
577     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
578     * pointing back to the original caller.
579     *
580     * Adapted from ARM32 code.
581     *
582     * Clobbers xIP0.
583     */
584.macro INVOKE_TRAMPOLINE_BODY cxx_name
585    .extern \cxx_name
586    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
587    // Helper signature is always
588    // (method_idx, *this_object, *caller_method, *self, sp)
589
590    mov    x2, xSELF                      // pass Thread::Current
591    mov    x3, sp
592    bl     \cxx_name                      // (method_idx, this, Thread*, SP)
593    mov    xIP0, x1                       // save Method*->code_
594    RESTORE_SAVE_REFS_AND_ARGS_FRAME
595    REFRESH_MARKING_REGISTER
596    cbz    x0, 1f                         // did we find the target? if not go to exception delivery
597    br     xIP0                           // tail call to target
5981:
599    DELIVER_PENDING_EXCEPTION
600.endm
601.macro INVOKE_TRAMPOLINE c_name, cxx_name
602ENTRY \c_name
603    INVOKE_TRAMPOLINE_BODY \cxx_name
604END \c_name
605.endm
606
607INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
608
609INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
610INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
611INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
612INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
613
614
615.macro INVOKE_STUB_CREATE_FRAME
616
617SAVE_SIZE=15*8   // x4, x5, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
618SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
619
620
621    mov x9, sp                             // Save stack pointer.
622    .cfi_register sp,x9
623
624    add x10, x2, # SAVE_SIZE_AND_METHOD    // calculate size of frame.
625    sub x10, sp, x10                       // Calculate SP position - saves + ArtMethod* + args
626    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
627    mov sp, x10                            // Set new SP.
628
629    sub x10, x9, #SAVE_SIZE                // Calculate new FP (later). Done here as we must move SP
630    .cfi_def_cfa_register x10              // before this.
631    .cfi_adjust_cfa_offset SAVE_SIZE
632
633    str x28, [x10, #112]
634    .cfi_rel_offset x28, 112
635
636    stp x26, x27, [x10, #96]
637    .cfi_rel_offset x26, 96
638    .cfi_rel_offset x27, 104
639
640    stp x24, x25, [x10, #80]
641    .cfi_rel_offset x24, 80
642    .cfi_rel_offset x25, 88
643
644    stp x22, x23, [x10, #64]
645    .cfi_rel_offset x22, 64
646    .cfi_rel_offset x23, 72
647
648    stp x20, x21, [x10, #48]
649    .cfi_rel_offset x20, 48
650    .cfi_rel_offset x21, 56
651
652    stp x9, x19, [x10, #32]                // Save old stack pointer and x19.
653    .cfi_rel_offset sp, 32
654    .cfi_rel_offset x19, 40
655
656    stp x4, x5, [x10, #16]                 // Save result and shorty addresses.
657    .cfi_rel_offset x4, 16
658    .cfi_rel_offset x5, 24
659
660    stp xFP, xLR, [x10]                    // Store LR & FP.
661    .cfi_rel_offset x29, 0
662    .cfi_rel_offset x30, 8
663
664    mov xFP, x10                           // Use xFP now, as it's callee-saved.
665    .cfi_def_cfa_register x29
666    mov xSELF, x3                          // Move thread pointer into SELF register.
667
668    // Copy arguments into stack frame.
669    // Use simple copy routine for now.
670    // 4 bytes per slot.
671    // X1 - source address
672    // W2 - args length
673    // X9 - destination address.
674    // W10 - temporary
675    add x9, sp, #8                         // Destination address is bottom of stack + null.
676
677    // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
678    // does not have unique-id variables.
6791:
680    cmp w2, #0
681    beq 2f
682    sub w2, w2, #4      // Need 65536 bytes of range.
683    ldr w10, [x1, x2]
684    str w10, [x9, x2]
685
686    b 1b
687
6882:
689    // Store null into ArtMethod* at bottom of frame.
690    str xzr, [sp]
691.endm
692
693.macro INVOKE_STUB_CALL_AND_RETURN
694
695    REFRESH_MARKING_REGISTER
696
697    // load method-> METHOD_QUICK_CODE_OFFSET
698    ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
699    // Branch to method.
700    blr x9
701
702    // Restore return value address and shorty address.
703    ldp x4, x5, [xFP, #16]
704    .cfi_restore x4
705    .cfi_restore x5
706
707    ldr x28, [xFP, #112]
708    .cfi_restore x28
709
710    ldp x26, x27, [xFP, #96]
711    .cfi_restore x26
712    .cfi_restore x27
713
714    ldp x24, x25, [xFP, #80]
715    .cfi_restore x24
716    .cfi_restore x25
717
718    ldp x22, x23, [xFP, #64]
719    .cfi_restore x22
720    .cfi_restore x23
721
722    ldp x20, x21, [xFP, #48]
723    .cfi_restore x20
724    .cfi_restore x21
725
726    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
727    ldrb w10, [x5]
728
729    // Check the return type and store the correct register into the jvalue in memory.
730    // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
731
732    // Don't set anything for a void type.
733    cmp w10, #'V'
734    beq 3f
735
736    // Is it a double?
737    cmp w10, #'D'
738    bne 1f
739    str d0, [x4]
740    b 3f
741
7421:  // Is it a float?
743    cmp w10, #'F'
744    bne 2f
745    str s0, [x4]
746    b 3f
747
7482:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
749    str x0, [x4]
750
7513:  // Finish up.
752    ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
753    .cfi_restore x19
754    mov sp, x2
755    .cfi_restore sp
756
757    ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
758    .cfi_restore x29
759    .cfi_restore x30
760
761    ret
762
763.endm
764
765
766/*
767 *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
768 *                                       uint32_t  *args,     x1
769 *                                       uint32_t argsize,    w2
770 *                                       Thread *self,        x3
771 *                                       JValue *result,      x4
772 *                                       char   *shorty);     x5
773 *  +----------------------+
774 *  |                      |
775 *  |  C/C++ frame         |
776 *  |       LR''           |
777 *  |       FP''           | <- SP'
778 *  +----------------------+
779 *  +----------------------+
780 *  |        x28           | <- TODO: Remove callee-saves.
781 *  |         :            |
782 *  |        x19           |
783 *  |        SP'           |
784 *  |        X5            |
785 *  |        X4            |        Saved registers
786 *  |        LR'           |
787 *  |        FP'           | <- FP
788 *  +----------------------+
789 *  | uint32_t out[n-1]    |
790 *  |    :      :          |        Outs
791 *  | uint32_t out[0]      |
792 *  | ArtMethod*           | <- SP  value=null
793 *  +----------------------+
794 *
795 * Outgoing registers:
796 *  x0    - Method*
797 *  x1-x7 - integer parameters.
798 *  d0-d7 - Floating point parameters.
799 *  xSELF = self
800 *  SP = & of ArtMethod*
801 *  x1 = "this" pointer.
802 *
803 */
804ENTRY art_quick_invoke_stub
805    // Spill registers as per AACPS64 calling convention.
806    INVOKE_STUB_CREATE_FRAME
807
808    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
809    // Parse the passed shorty to determine which register to load.
810    // Load addresses for routines that load WXSD registers.
811    adr  x11, .LstoreW2
812    adr  x12, .LstoreX2
813    adr  x13, .LstoreS0
814    adr  x14, .LstoreD0
815
816    // Initialize routine offsets to 0 for integers and floats.
817    // x8 for integers, x15 for floating point.
818    mov x8, #0
819    mov x15, #0
820
821    add x10, x5, #1         // Load shorty address, plus one to skip return value.
822    ldr w1, [x9],#4         // Load "this" parameter, and increment arg pointer.
823
824    // Loop to fill registers.
825.LfillRegisters:
826    ldrb w17, [x10], #1       // Load next character in signature, and increment.
827    cbz w17, .LcallFunction   // Exit at end of signature. Shorty 0 terminated.
828
829    cmp  w17, #'F' // is this a float?
830    bne .LisDouble
831
832    cmp x15, # 8*12         // Skip this load if all registers full.
833    beq .Ladvance4
834
835    add x17, x13, x15       // Calculate subroutine to jump to.
836    br  x17
837
838.LisDouble:
839    cmp w17, #'D'           // is this a double?
840    bne .LisLong
841
842    cmp x15, # 8*12         // Skip this load if all registers full.
843    beq .Ladvance8
844
845    add x17, x14, x15       // Calculate subroutine to jump to.
846    br x17
847
848.LisLong:
849    cmp w17, #'J'           // is this a long?
850    bne .LisOther
851
852    cmp x8, # 6*12          // Skip this load if all registers full.
853    beq .Ladvance8
854
855    add x17, x12, x8        // Calculate subroutine to jump to.
856    br x17
857
858.LisOther:                  // Everything else takes one vReg.
859    cmp x8, # 6*12          // Skip this load if all registers full.
860    beq .Ladvance4
861
862    add x17, x11, x8        // Calculate subroutine to jump to.
863    br x17
864
865.Ladvance4:
866    add x9, x9, #4
867    b .LfillRegisters
868
869.Ladvance8:
870    add x9, x9, #8
871    b .LfillRegisters
872
873// Macro for loading a parameter into a register.
874//  counter - the register with offset into these tables
875//  size - the size of the register - 4 or 8 bytes.
876//  register - the name of the register to be loaded.
877.macro LOADREG counter size register return
878    ldr \register , [x9], #\size
879    add \counter, \counter, 12
880    b \return
881.endm
882
883// Store ints.
884.LstoreW2:
885    LOADREG x8 4 w2 .LfillRegisters
886    LOADREG x8 4 w3 .LfillRegisters
887    LOADREG x8 4 w4 .LfillRegisters
888    LOADREG x8 4 w5 .LfillRegisters
889    LOADREG x8 4 w6 .LfillRegisters
890    LOADREG x8 4 w7 .LfillRegisters
891
892// Store longs.
893.LstoreX2:
894    LOADREG x8 8 x2 .LfillRegisters
895    LOADREG x8 8 x3 .LfillRegisters
896    LOADREG x8 8 x4 .LfillRegisters
897    LOADREG x8 8 x5 .LfillRegisters
898    LOADREG x8 8 x6 .LfillRegisters
899    LOADREG x8 8 x7 .LfillRegisters
900
901// Store singles.
902.LstoreS0:
903    LOADREG x15 4 s0 .LfillRegisters
904    LOADREG x15 4 s1 .LfillRegisters
905    LOADREG x15 4 s2 .LfillRegisters
906    LOADREG x15 4 s3 .LfillRegisters
907    LOADREG x15 4 s4 .LfillRegisters
908    LOADREG x15 4 s5 .LfillRegisters
909    LOADREG x15 4 s6 .LfillRegisters
910    LOADREG x15 4 s7 .LfillRegisters
911
912// Store doubles.
913.LstoreD0:
914    LOADREG x15 8 d0 .LfillRegisters
915    LOADREG x15 8 d1 .LfillRegisters
916    LOADREG x15 8 d2 .LfillRegisters
917    LOADREG x15 8 d3 .LfillRegisters
918    LOADREG x15 8 d4 .LfillRegisters
919    LOADREG x15 8 d5 .LfillRegisters
920    LOADREG x15 8 d6 .LfillRegisters
921    LOADREG x15 8 d7 .LfillRegisters
922
923
924.LcallFunction:
925
926    INVOKE_STUB_CALL_AND_RETURN
927
928END art_quick_invoke_stub
929
930/*  extern"C"
931 *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
932 *                                       uint32_t  *args,     x1
933 *                                       uint32_t argsize,    w2
934 *                                       Thread *self,        x3
935 *                                       JValue *result,      x4
936 *                                       char   *shorty);     x5
937 */
938ENTRY art_quick_invoke_static_stub
939    // Spill registers as per AACPS64 calling convention.
940    INVOKE_STUB_CREATE_FRAME
941
942    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
943    // Parse the passed shorty to determine which register to load.
944    // Load addresses for routines that load WXSD registers.
945    adr  x11, .LstoreW1_2
946    adr  x12, .LstoreX1_2
947    adr  x13, .LstoreS0_2
948    adr  x14, .LstoreD0_2
949
950    // Initialize routine offsets to 0 for integers and floats.
951    // x8 for integers, x15 for floating point.
952    mov x8, #0
953    mov x15, #0
954
955    add x10, x5, #1     // Load shorty address, plus one to skip return value.
956
957    // Loop to fill registers.
958.LfillRegisters2:
959    ldrb w17, [x10], #1         // Load next character in signature, and increment.
960    cbz w17, .LcallFunction2    // Exit at end of signature. Shorty 0 terminated.
961
962    cmp  w17, #'F'          // is this a float?
963    bne .LisDouble2
964
965    cmp x15, # 8*12         // Skip this load if all registers full.
966    beq .Ladvance4_2
967
968    add x17, x13, x15       // Calculate subroutine to jump to.
969    br  x17
970
971.LisDouble2:
972    cmp w17, #'D'           // is this a double?
973    bne .LisLong2
974
975    cmp x15, # 8*12         // Skip this load if all registers full.
976    beq .Ladvance8_2
977
978    add x17, x14, x15       // Calculate subroutine to jump to.
979    br x17
980
981.LisLong2:
982    cmp w17, #'J'           // is this a long?
983    bne .LisOther2
984
985    cmp x8, # 7*12          // Skip this load if all registers full.
986    beq .Ladvance8_2
987
988    add x17, x12, x8        // Calculate subroutine to jump to.
989    br x17
990
991.LisOther2:                 // Everything else takes one vReg.
992    cmp x8, # 7*12          // Skip this load if all registers full.
993    beq .Ladvance4_2
994
995    add x17, x11, x8        // Calculate subroutine to jump to.
996    br x17
997
998.Ladvance4_2:
999    add x9, x9, #4
1000    b .LfillRegisters2
1001
1002.Ladvance8_2:
1003    add x9, x9, #8
1004    b .LfillRegisters2
1005
1006// Store ints.
1007.LstoreW1_2:
1008    LOADREG x8 4 w1 .LfillRegisters2
1009    LOADREG x8 4 w2 .LfillRegisters2
1010    LOADREG x8 4 w3 .LfillRegisters2
1011    LOADREG x8 4 w4 .LfillRegisters2
1012    LOADREG x8 4 w5 .LfillRegisters2
1013    LOADREG x8 4 w6 .LfillRegisters2
1014    LOADREG x8 4 w7 .LfillRegisters2
1015
1016// Store longs.
1017.LstoreX1_2:
1018    LOADREG x8 8 x1 .LfillRegisters2
1019    LOADREG x8 8 x2 .LfillRegisters2
1020    LOADREG x8 8 x3 .LfillRegisters2
1021    LOADREG x8 8 x4 .LfillRegisters2
1022    LOADREG x8 8 x5 .LfillRegisters2
1023    LOADREG x8 8 x6 .LfillRegisters2
1024    LOADREG x8 8 x7 .LfillRegisters2
1025
1026// Store singles.
1027.LstoreS0_2:
1028    LOADREG x15 4 s0 .LfillRegisters2
1029    LOADREG x15 4 s1 .LfillRegisters2
1030    LOADREG x15 4 s2 .LfillRegisters2
1031    LOADREG x15 4 s3 .LfillRegisters2
1032    LOADREG x15 4 s4 .LfillRegisters2
1033    LOADREG x15 4 s5 .LfillRegisters2
1034    LOADREG x15 4 s6 .LfillRegisters2
1035    LOADREG x15 4 s7 .LfillRegisters2
1036
1037// Store doubles.
1038.LstoreD0_2:
1039    LOADREG x15 8 d0 .LfillRegisters2
1040    LOADREG x15 8 d1 .LfillRegisters2
1041    LOADREG x15 8 d2 .LfillRegisters2
1042    LOADREG x15 8 d3 .LfillRegisters2
1043    LOADREG x15 8 d4 .LfillRegisters2
1044    LOADREG x15 8 d5 .LfillRegisters2
1045    LOADREG x15 8 d6 .LfillRegisters2
1046    LOADREG x15 8 d7 .LfillRegisters2
1047
1048
1049.LcallFunction2:
1050
1051    INVOKE_STUB_CALL_AND_RETURN
1052
1053END art_quick_invoke_static_stub
1054
1055
1056
1057/*  extern"C" void art_quick_osr_stub(void** stack,                x0
1058 *                                    size_t stack_size_in_bytes,  x1
1059 *                                    const uin8_t* native_pc,     x2
1060 *                                    JValue *result,              x3
1061 *                                    char   *shorty,              x4
1062 *                                    Thread *self)                x5
1063 */
1064ENTRY art_quick_osr_stub
1065SAVE_SIZE=14*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, FP, LR saved.
1066    SAVE_TWO_REGS_INCREASE_FRAME x3, x4, SAVE_SIZE
1067    SAVE_TWO_REGS x19, x20, 16
1068    SAVE_TWO_REGS x21, x22, 32
1069    SAVE_TWO_REGS x23, x24, 48
1070    SAVE_TWO_REGS x25, x26, 64
1071    SAVE_TWO_REGS x27, x28, 80
1072    SAVE_TWO_REGS xFP, xLR, 96
1073
1074    mov xSELF, x5                         // Move thread pointer into SELF register.
1075    REFRESH_MARKING_REGISTER
1076
1077    INCREASE_FRAME 16
1078    str xzr, [sp]                         // Store null for ArtMethod* slot
1079    // Branch to stub.
1080    bl .Losr_entry
1081    .cfi_remember_state
1082    DECREASE_FRAME 16
1083
1084    // Restore saved registers including value address and shorty address.
1085    RESTORE_TWO_REGS x19, x20, 16
1086    RESTORE_TWO_REGS x21, x22, 32
1087    RESTORE_TWO_REGS x23, x24, 48
1088    RESTORE_TWO_REGS x25, x26, 64
1089    RESTORE_TWO_REGS x27, x28, 80
1090    RESTORE_TWO_REGS xFP, xLR, 96
1091    RESTORE_TWO_REGS_DECREASE_FRAME x3, x4, SAVE_SIZE
1092
1093    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
1094    ldrb w10, [x4]
1095
1096    // Check the return type and store the correct register into the jvalue in memory.
1097
1098    // Don't set anything for a void type.
1099    cmp w10, #'V'
1100    beq .Losr_exit
1101    // Is it a double?
1102    cmp w10, #'D'
1103    beq .Losr_return_double
1104    // Is it a float?
1105    cmp w10, #'F'
1106    beq .Losr_return_float
1107    // Just store x0. Doesn't matter if it is 64 or 32 bits.
1108    str x0, [x3]
1109.Losr_exit:
1110    ret
1111.Losr_return_double:
1112    str d0, [x3]
1113    ret
1114.Losr_return_float:
1115    str s0, [x3]
1116    ret
1117
1118.Losr_entry:
1119    .cfi_restore_state                     // Reset unwind info so following code unwinds.
1120    .cfi_def_cfa_offset (SAVE_SIZE+16)     // workaround for clang bug: 31975598
1121
1122    mov x9, sp                             // Save stack pointer.
1123    .cfi_def_cfa_register x9
1124
1125    // Update stack pointer for the callee
1126    sub sp, sp, x1
1127
1128    // Update link register slot expected by the callee.
1129    sub w1, w1, #8
1130    str lr, [sp, x1]
1131
1132    // Copy arguments into stack frame.
1133    // Use simple copy routine for now.
1134    // 4 bytes per slot.
1135    // X0 - source address
1136    // W1 - args length
1137    // SP - destination address.
1138    // W10 - temporary
1139.Losr_loop_entry:
1140    cbz w1, .Losr_loop_exit
1141    sub w1, w1, #4
1142    ldr w10, [x0, x1]
1143    str w10, [sp, x1]
1144    b .Losr_loop_entry
1145
1146.Losr_loop_exit:
1147    // Branch to the OSR entry point.
1148    br x2
1149
1150END art_quick_osr_stub
1151
1152    /*
1153     * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
1154     */
1155
1156ENTRY art_quick_do_long_jump
1157    // Load FPRs
1158    ldp d0, d1, [x1], #16
1159    ldp d2, d3, [x1], #16
1160    ldp d4, d5, [x1], #16
1161    ldp d6, d7, [x1], #16
1162    ldp d8, d9, [x1], #16
1163    ldp d10, d11, [x1], #16
1164    ldp d12, d13, [x1], #16
1165    ldp d14, d15, [x1], #16
1166    ldp d16, d17, [x1], #16
1167    ldp d18, d19, [x1], #16
1168    ldp d20, d21, [x1], #16
1169    ldp d22, d23, [x1], #16
1170    ldp d24, d25, [x1], #16
1171    ldp d26, d27, [x1], #16
1172    ldp d28, d29, [x1], #16
1173    ldp d30, d31, [x1]
1174
1175    // Load GPRs
1176    // TODO: lots of those are smashed, could optimize.
1177    add x0, x0, #30*8
1178    ldp x30, x1, [x0], #-16          // LR & SP
1179    ldp x28, x29, [x0], #-16
1180    ldp x26, x27, [x0], #-16
1181    ldp x24, x25, [x0], #-16
1182    ldp x22, x23, [x0], #-16
1183    ldp x20, x21, [x0], #-16
1184    add x0, x0, #8
1185    ldr x19, [x0]
1186    sub x0, x0, #24
1187    ldp x16, x17, [x0], #-16
1188    ldp x14, x15, [x0], #-16
1189    ldp x12, x13, [x0], #-16
1190    ldp x10, x11, [x0], #-16
1191    ldp x8, x9, [x0], #-16
1192    ldp x6, x7, [x0], #-16
1193    ldp x4, x5, [x0], #-16
1194    ldp x2, x3, [x0], #-16
1195    mov sp, x1
1196
1197    REFRESH_MARKING_REGISTER
1198
1199    // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
1200    ldr x1, [x0, #33*8]
1201    // And the value of x0.
1202    ldr x0, [x0]
1203
1204    br  x1
1205END art_quick_do_long_jump
1206
1207    /*
1208     * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
1209     * possibly null object to lock.
1210     *
1211     * Derived from arm32 code.
1212     */
1213    .extern artLockObjectFromCode
1214ENTRY art_quick_lock_object
1215    cbz    w0, .Lslow_lock
1216    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
1217.Lretry_lock:
1218    ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
1219    ldaxr  w1, [x4]                   // acquire needed only in most common case
1220    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1221    cbnz   w3, .Lnot_unlocked         // already thin locked
1222    // unlocked case - x1: original lock word that's zero except for the read barrier bits.
1223    orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
1224    stxr   w3, w2, [x4]
1225    cbnz   w3, .Llock_stxr_fail       // store failed, retry
1226    ret
1227.Lnot_unlocked:  // x1: original lock word
1228    lsr    w3, w1, LOCK_WORD_STATE_SHIFT
1229    cbnz   w3, .Lslow_lock            // if either of the top two bits are set, go slow path
1230    eor    w2, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
1231    uxth   w2, w2                     // zero top 16 bits
1232    cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
1233                                      // else contention, go to slow path
1234    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
1235    add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
1236    lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
1237    cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
1238    add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
1239    stxr   w3, w2, [x4]
1240    cbnz   w3, .Llock_stxr_fail       // store failed, retry
1241    ret
1242.Llock_stxr_fail:
1243    b      .Lretry_lock               // retry
1244.Lslow_lock:
1245    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1246    mov    x1, xSELF                  // pass Thread::Current
1247    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1248    RESTORE_SAVE_REFS_ONLY_FRAME
1249    REFRESH_MARKING_REGISTER
1250    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1251END art_quick_lock_object
1252
1253ENTRY art_quick_lock_object_no_inline
1254    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1255    mov    x1, xSELF                  // pass Thread::Current
1256    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1257    RESTORE_SAVE_REFS_ONLY_FRAME
1258    REFRESH_MARKING_REGISTER
1259    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1260END art_quick_lock_object_no_inline
1261
1262    /*
1263     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
1264     * x0 holds the possibly null object to lock.
1265     *
1266     * Derived from arm32 code.
1267     */
1268    .extern artUnlockObjectFromCode
1269ENTRY art_quick_unlock_object
1270    cbz    x0, .Lslow_unlock
1271    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
1272.Lretry_unlock:
1273#ifndef USE_READ_BARRIER
1274    ldr    w1, [x4]
1275#else
1276    ldxr   w1, [x4]                   // Need to use atomic instructions for read barrier
1277#endif
1278    lsr    w2, w1, LOCK_WORD_STATE_SHIFT
1279    cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
1280    ldr    w2, [xSELF, #THREAD_ID_OFFSET]
1281    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1282    eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
1283    uxth   w3, w3                     // zero top 16 bits
1284    cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
1285    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1286    cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
1287    bpl    .Lrecursive_thin_unlock
1288    // transition to unlocked
1289    and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
1290#ifndef USE_READ_BARRIER
1291    stlr   w3, [x4]
1292#else
1293    stlxr  w2, w3, [x4]               // Need to use atomic instructions for read barrier
1294    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
1295#endif
1296    ret
1297.Lrecursive_thin_unlock:  // w1: original lock word
1298    sub    w1, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
1299#ifndef USE_READ_BARRIER
1300    str    w1, [x4]
1301#else
1302    stxr   w2, w1, [x4]               // Need to use atomic instructions for read barrier
1303    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
1304#endif
1305    ret
1306.Lunlock_stxr_fail:
1307    b      .Lretry_unlock             // retry
1308.Lslow_unlock:
1309    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1310    mov    x1, xSELF                  // pass Thread::Current
1311    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1312    RESTORE_SAVE_REFS_ONLY_FRAME
1313    REFRESH_MARKING_REGISTER
1314    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1315END art_quick_unlock_object
1316
1317ENTRY art_quick_unlock_object_no_inline
1318    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1319    mov    x1, xSELF                  // pass Thread::Current
1320    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1321    RESTORE_SAVE_REFS_ONLY_FRAME
1322    REFRESH_MARKING_REGISTER
1323    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1324END art_quick_unlock_object_no_inline
1325
1326    /*
1327     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
1328     * artThrowClassCastExceptionForObject.
1329     */
1330    .extern artInstanceOfFromCode
1331    .extern artThrowClassCastExceptionForObject
1332ENTRY art_quick_check_instance_of
1333    // Store arguments and link register
1334    // Stack needs to be 16B aligned on calls.
1335    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
1336    SAVE_REG xLR, 24
1337
1338    // Call runtime code
1339    bl artInstanceOfFromCode
1340
1341    // Restore LR.
1342    RESTORE_REG xLR, 24
1343
1344    // Check for exception
1345    cbz x0, .Lthrow_class_cast_exception
1346
1347    // Restore and return
1348    .cfi_remember_state
1349    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1350    ret
1351    .cfi_restore_state                // Reset unwind info so following code unwinds.
1352    .cfi_def_cfa_offset 32            // workaround for clang bug: 31975598
1353
1354.Lthrow_class_cast_exception:
1355    // Restore
1356    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1357
1358    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1359    mov x2, xSELF                     // pass Thread::Current
1360    bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
1361    brk 0                             // We should not return here...
1362END art_quick_check_instance_of
1363
1364// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
1365.macro POP_REG_NE xReg, offset, xExclude
1366    .ifnc \xReg, \xExclude
1367        ldr \xReg, [sp, #\offset]     // restore xReg
1368        .cfi_restore \xReg
1369    .endif
1370.endm
1371
1372// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
1373// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
1374.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
1375    .ifc \xReg1, \xExclude
1376        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
1377    .else
1378        .ifc \xReg2, \xExclude
1379            ldr \xReg1, [sp, #\offset]          // restore xReg1
1380        .else
1381            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
1382        .endif
1383    .endif
1384    .cfi_restore \xReg1
1385    .cfi_restore \xReg2
1386.endm
1387
1388    /*
1389     * Macro to insert read barrier, only used in art_quick_aput_obj.
1390     * xDest, wDest and xObj are registers, offset is a defined literal such as
1391     * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle
1392     * name mismatch between instructions. This macro uses the lower 32b of register when possible.
1393     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1394     */
1395.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
1396#ifdef USE_READ_BARRIER
1397# ifdef USE_BAKER_READ_BARRIER
1398    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
1399    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
1400    // False dependency to avoid needing load/load fence.
1401    add \xObj, \xObj, \xTemp, lsr #32
1402    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1403    UNPOISON_HEAP_REF \wDest
1404    b .Lrb_exit\number
1405# endif  // USE_BAKER_READ_BARRIER
1406.Lrb_slowpath\number:
1407    // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
1408    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 48
1409    SAVE_TWO_REGS x2, x3, 16
1410    SAVE_TWO_REGS x4, xLR, 32
1411
1412    // mov x0, \xRef                // pass ref in x0 (no-op for now since parameter ref is unused)
1413    .ifnc \xObj, x1
1414        mov x1, \xObj               // pass xObj
1415    .endif
1416    mov w2, #\offset                // pass offset
1417    bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
1418    // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
1419    .ifnc \wDest, w0
1420        mov \wDest, w0              // save return value in wDest
1421    .endif
1422
1423    // Conditionally restore saved registers
1424    POP_REG_NE x0, 0, \xDest
1425    POP_REG_NE x1, 8, \xDest
1426    POP_REG_NE x2, 16, \xDest
1427    POP_REG_NE x3, 24, \xDest
1428    POP_REG_NE x4, 32, \xDest
1429    RESTORE_REG xLR, 40
1430    DECREASE_FRAME 48
1431.Lrb_exit\number:
1432#else
1433    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1434    UNPOISON_HEAP_REF \wDest
1435#endif  // USE_READ_BARRIER
1436.endm
1437
1438#ifdef USE_READ_BARRIER
1439    .extern artReadBarrierSlow
1440#endif
1441ENTRY art_quick_aput_obj
1442    cbz x2, .Ldo_aput_null
1443    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
1444                                                                    // This also zero-extends to x3
1445    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
1446    // This also zero-extends to x3
1447    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
1448                                                                    // This also zero-extends to x4
1449    cmp w3, w4  // value's type == array's component type - trivial assignability
1450    bne .Lcheck_assignability
1451.Ldo_aput:
1452    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1453                                                         // "Compress" = do nothing
1454    POISON_HEAP_REF w2
1455    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1456    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1457    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
1458    strb w3, [x3, x0]
1459    ret
1460.Ldo_aput_null:
1461    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1462                                                         // "Compress" = do nothing
1463    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1464    ret
1465.Lcheck_assignability:
1466    // Store arguments and link register
1467    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
1468    SAVE_TWO_REGS x2, xLR, 16
1469
1470    // Call runtime code
1471    mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1472    mov x1, x4              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1473    bl artIsAssignableFromCode
1474
1475    // Check for exception
1476    cbz x0, .Lthrow_array_store_exception
1477
1478    // Restore
1479    .cfi_remember_state
1480    RESTORE_TWO_REGS x2, xLR, 16
1481    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1482
1483    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1484                                                          // "Compress" = do nothing
1485    POISON_HEAP_REF w2
1486    str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
1487    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1488    lsr x0, x0, #CARD_TABLE_CARD_SHIFT
1489    strb w3, [x3, x0]
1490    ret
1491    .cfi_restore_state            // Reset unwind info so following code unwinds.
1492    .cfi_def_cfa_offset 32        // workaround for clang bug: 31975598
1493.Lthrow_array_store_exception:
1494    RESTORE_TWO_REGS x2, xLR, 16
1495    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
1496
1497    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
1498    mov x1, x2                      // Pass value.
1499    mov x2, xSELF                   // Pass Thread::Current.
1500    bl artThrowArrayStoreException  // (Object*, Object*, Thread*).
1501    brk 0                           // Unreached.
1502END art_quick_aput_obj
1503
1504// Macro to facilitate adding new allocation entrypoints.
1505.macro ONE_ARG_DOWNCALL name, entrypoint, return
1506    .extern \entrypoint
1507ENTRY \name
1508    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1509    mov    x1, xSELF                  // pass Thread::Current
1510    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1511    RESTORE_SAVE_REFS_ONLY_FRAME
1512    REFRESH_MARKING_REGISTER
1513    \return
1514END \name
1515.endm
1516
1517// Macro to facilitate adding new allocation entrypoints.
1518.macro TWO_ARG_DOWNCALL name, entrypoint, return
1519    .extern \entrypoint
1520ENTRY \name
1521    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1522    mov    x2, xSELF                  // pass Thread::Current
1523    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1524    RESTORE_SAVE_REFS_ONLY_FRAME
1525    REFRESH_MARKING_REGISTER
1526    \return
1527END \name
1528.endm
1529
1530// Macro to facilitate adding new allocation entrypoints.
1531.macro THREE_ARG_DOWNCALL name, entrypoint, return
1532    .extern \entrypoint
1533ENTRY \name
1534    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1535    mov    x3, xSELF                  // pass Thread::Current
1536    bl     \entrypoint
1537    RESTORE_SAVE_REFS_ONLY_FRAME
1538    REFRESH_MARKING_REGISTER
1539    \return
1540END \name
1541.endm
1542
1543// Macro to facilitate adding new allocation entrypoints.
1544.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1545    .extern \entrypoint
1546ENTRY \name
1547    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1548    mov    x4, xSELF                  // pass Thread::Current
1549    bl     \entrypoint                //
1550    RESTORE_SAVE_REFS_ONLY_FRAME
1551    REFRESH_MARKING_REGISTER
1552    \return
1553END \name
1554.endm
1555
1556// Macros taking opportunity of code similarities for downcalls.
1557.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
1558    .extern \entrypoint
1559ENTRY \name
1560    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1561    mov    x1, xSELF                  // pass Thread::Current
1562    bl     \entrypoint                // (uint32_t type_idx, Thread*)
1563    RESTORE_SAVE_REFS_ONLY_FRAME
1564    REFRESH_MARKING_REGISTER
1565    \return
1566END \name
1567.endm
1568
1569.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
1570    .extern \entrypoint
1571ENTRY \name
1572    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1573    mov    x2, xSELF                  // pass Thread::Current
1574    bl     \entrypoint
1575    RESTORE_SAVE_REFS_ONLY_FRAME
1576    REFRESH_MARKING_REGISTER
1577    \return
1578END \name
1579.endm
1580
1581.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
1582    .extern \entrypoint
1583ENTRY \name
1584    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1585    mov    x3, xSELF                  // pass Thread::Current
1586    bl     \entrypoint
1587    RESTORE_SAVE_REFS_ONLY_FRAME
1588    REFRESH_MARKING_REGISTER
1589    \return
1590END \name
1591.endm
1592
1593// Macro for string and type resolution and initialization.
1594.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
1595    .extern \entrypoint
1596ENTRY \name
1597    SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset       // save everything for stack crawl
1598    mov   x1, xSELF                   // pass Thread::Current
1599    bl    \entrypoint                 // (int32_t index, Thread* self)
1600    cbz   w0, 1f                      // If result is null, deliver the OOME.
1601    .cfi_remember_state
1602    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
1603    REFRESH_MARKING_REGISTER
1604    ret                        // return
1605    .cfi_restore_state
1606    .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING  // workaround for clang bug: 31975598
16071:
1608    DELIVER_PENDING_EXCEPTION_FRAME_READY
1609END \name
1610.endm
1611
1612.macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
1613    ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
1614.endm
1615
1616.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1617    cbz w0, 1f                 // result zero branch over
1618    ret                        // return
16191:
1620    DELIVER_PENDING_EXCEPTION
1621.endm
1622
1623    /*
1624     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
1625     * failure.
1626     */
1627TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1628
1629    /*
1630     * Entry from managed code when uninitialized static storage, this stub will run the class
1631     * initializer and deliver the exception on error. On success the static storage base is
1632     * returned.
1633     */
1634ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
1635ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
1636ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
1637ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
1638
1639// Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
1640// defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
1641
1642ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1643ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1644ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1645ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1646ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1647ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1648ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1649
1650TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1651TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1652TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1653TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1654TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1655TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1656TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1657
1658TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1659TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1660TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1661TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1662TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1663
1664THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1665THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1666THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1667THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1668THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1669
1670// Generate the allocation entrypoints for each allocator.
1671GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
1672// Comment out allocators that have arm64 specific asm.
1673// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1674// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1675GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1676// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1677// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
1678// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
1679// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
1680// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
1681GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1682GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1683GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1684
1685// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
1686// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
1687GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
1688// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
1689// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
1690// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
1691// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
1692// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
1693GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
1694GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
1695GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
1696
1697// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1698// If isInitialized=0 the compiler can only assume it's been at least resolved.
1699.macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
1700ENTRY \c_name
1701    // Fast path rosalloc allocation.
1702    // x0: type, xSELF(x19): Thread::Current
1703    // x1-x7: free.
1704    ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
1705                                                              // allocation stack has room.
1706                                                              // ldp won't work due to large offset.
1707    ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1708    cmp    x3, x4
1709    bhs    .Lslow_path\c_name
1710    ldr    w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
1711    cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1712                                                              // local allocation. Also does the
1713                                                              // finalizable and initialization
1714                                                              // checks.
1715    // When isInitialized == 0, then the class is potentially not yet initialized.
1716    // If the class is not yet initialized, the object size will be very large to force the branch
1717    // below to be taken.
1718    //
1719    // See InitializeClassVisitors in class-inl.h for more details.
1720    bhs    .Lslow_path\c_name
1721                                                              // Compute the rosalloc bracket index
1722                                                              // from the size. Since the size is
1723                                                              // already aligned we can combine the
1724                                                              // two shifts together.
1725    add    x4, xSELF, x3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
1726                                                              // Subtract pointer size since ther
1727                                                              // are no runs for 0 byte allocations
1728                                                              // and the size is already aligned.
1729    ldr    x4, [x4, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
1730                                                              // Load the free list head (x3). This
1731                                                              // will be the return val.
1732    ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1733    cbz    x3, .Lslow_path\c_name
1734    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1735    ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1736                                                              // and update the list head with the
1737                                                              // next pointer.
1738    str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1739                                                              // Store the class pointer in the
1740                                                              // header. This also overwrites the
1741                                                              // next pointer. The offsets are
1742                                                              // asserted to match.
1743
1744#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1745#error "Class pointer needs to overwrite next pointer."
1746#endif
1747    POISON_HEAP_REF w0
1748    str    w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
1749                                                              // Push the new object onto the thread
1750                                                              // local allocation stack and
1751                                                              // increment the thread local
1752                                                              // allocation stack top.
1753    ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1754    str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
1755    str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1756                                                              // Decrement the size of the free list
1757
1758    // After this "STR" the object is published to the thread local allocation stack,
1759    // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
1760    // It is not yet visible to the running (user) compiled code until after the return.
1761    //
1762    // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
1763    // the state of the allocation stack slot. It can be a pointer to one of:
1764    // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
1765    //       (The stack initial state is "null" pointers).
1766    // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
1767    // 2) A fully valid object, with a valid class pointer pointing to a real class.
1768    // Other states are not allowed.
1769    //
1770    // An object that is invalid only temporarily, and will eventually become valid.
1771    // The internal runtime code simply checks if the object is not null or is partial and then
1772    // ignores it.
1773    //
1774    // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
1775    // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
1776    // "next" pointer is not-cyclic.)
1777    //
1778    // See also b/28790624 for a listing of CLs dealing with this race.
1779    ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1780    sub    x1, x1, #1
1781                                                              // TODO: consider combining this store
1782                                                              // and the list head store above using
1783                                                              // strd.
1784    str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1785
1786    mov    x0, x3                                             // Set the return value and return.
1787.if \isInitialized == 0
1788    // This barrier is only necessary when the allocation also requires
1789    // a class initialization check.
1790    //
1791    // If the class is already observably initialized, then new-instance allocations are protected
1792    // from publishing by the compiler which inserts its own StoreStore barrier.
1793    dmb    ish
1794    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1795    // they should happen-after the implicit initialization check.
1796    //
1797    // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
1798    // a new observably-initialized class state.
1799.endif
1800    ret
1801.Lslow_path\c_name:
1802    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
1803    mov    x1, xSELF                                // pass Thread::Current
1804    bl     \cxx_name
1805    RESTORE_SAVE_REFS_ONLY_FRAME
1806    REFRESH_MARKING_REGISTER
1807    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1808END \c_name
1809.endm
1810
1811ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
1812ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
1813
1814// If isInitialized=1 then the compiler assumes the object's class has already been initialized.
1815// If isInitialized=0 the compiler can only assume it's been at least resolved.
1816.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel isInitialized
1817    ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
1818    ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
1819    ldr    w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
1820    add    x6, x4, x7                                         // Add object size to tlab pos.
1821    cmp    x6, x5                                             // Check if it fits, overflow works
1822                                                              // since the tlab pos and end are 32
1823                                                              // bit values.
1824
1825    // When isInitialized == 0, then the class is potentially not yet initialized.
1826    // If the class is not yet initialized, the object size will be very large to force the branch
1827    // below to be taken.
1828    //
1829    // See InitializeClassVisitors in class-inl.h for more details.
1830    bhi    \slowPathLabel
1831    str    x6, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
1832    ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
1833    add    x5, x5, #1
1834    str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1835    POISON_HEAP_REF w0
1836    str    w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
1837                                                              // Fence. This is "ish" not "ishst" so
1838                                                              // that the code after this allocation
1839                                                              // site will see the right values in
1840                                                              // the fields of the class.
1841    mov    x0, x4
1842.if \isInitialized == 0
1843    // This barrier is only necessary when the allocation also requires
1844    // a class initialization check.
1845    //
1846    // If the class is already observably initialized, then new-instance allocations are protected
1847    // from publishing by the compiler which inserts its own StoreStore barrier.
1848    dmb    ish
1849    // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
1850    // they should happen-after the implicit initialization check.
1851    //
1852    // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
1853    // a new observably-initialized class state.
1854.endif
1855    ret
1856.endm
1857
1858// The common code for art_quick_alloc_object_*region_tlab
1859.macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
1860ENTRY \name
1861    // Fast path region tlab allocation.
1862    // x0: type, xSELF(x19): Thread::Current
1863    // x1-x7: free.
1864    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name, \isInitialized
1865.Lslow_path\name:
1866    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
1867    mov    x1, xSELF                           // Pass Thread::Current.
1868    bl     \entrypoint                         // (mirror::Class*, Thread*)
1869    RESTORE_SAVE_REFS_ONLY_FRAME
1870    REFRESH_MARKING_REGISTER
1871    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1872END \name
1873.endm
1874
1875GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
1876GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
1877GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
1878GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
1879
1880.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1881    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignment mask
1882                                                              // (addr + 7) & ~7. The mask must
1883                                                              // be 64 bits to keep high bits in
1884                                                              // case of overflow.
1885    // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
1886    // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
1887    // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
1888    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
1889    bhs    \slowPathLabel                                     // path.
1890
1891    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
1892                                                              // we use (end - begin) to handle
1893                                                              // negative size arrays. It is
1894                                                              // assumed that a negative size will
1895                                                              // always be greater unsigned than
1896                                                              // region size.
1897    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
1898    sub    \xTemp2, \xTemp2, \xTemp0
1899    cmp    \xTemp1, \xTemp2
1900
1901    // The array class is always initialized here. Unlike new-instance,
1902    // this does not act as a double test.
1903    bhi    \slowPathLabel
1904    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1905                                                              // Move old thread_local_pos to x0
1906                                                              // for the return value.
1907    mov    x0, \xTemp0
1908    add    \xTemp0, \xTemp0, \xTemp1
1909    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
1910    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
1911    add    \xTemp0, \xTemp0, #1
1912    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1913    POISON_HEAP_REF \wClass
1914    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
1915    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
1916                                                              // Fence.
1917// new-array is special. The class is loaded and immediately goes to the Initialized state
1918// before it is published. Therefore the only fence needed is for the publication of the object.
1919// See ClassLinker::CreateArrayClass() for more details.
1920
1921// For publication of the new array, we don't need a 'dmb ishst' here.
1922// The compiler generates 'dmb ishst' for all new-array insts.
1923    ret
1924.endm
1925
1926.macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
1927ENTRY \name
1928    // Fast path array allocation for region tlab allocation.
1929    // x0: mirror::Class* type
1930    // x1: int32_t component_count
1931    // x2-x7: free.
1932    mov    x3, x0
1933    \size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
1934    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
1935.Lslow_path\name:
1936    // x0: mirror::Class* klass
1937    // x1: int32_t component_count
1938    // x2: Thread* self
1939    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1940    mov    x2, xSELF                  // pass Thread::Current
1941    bl     \entrypoint
1942    RESTORE_SAVE_REFS_ONLY_FRAME
1943    REFRESH_MARKING_REGISTER
1944    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1945END \name
1946.endm
1947
1948.macro COMPUTE_ARRAY_SIZE_UNKNOWN xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1949    // Array classes are never finalizable or uninitialized, no need to check.
1950    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
1951    UNPOISON_HEAP_REF \wTemp0
1952    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1953    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
1954                                                              // bits.
1955                                                              // xCount is holding a 32 bit value,
1956                                                              // it can not overflow.
1957    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
1958    // Add array data offset and alignment.
1959    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1960#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1961#error Long array data offset must be 4 greater than int array data offset.
1962#endif
1963
1964    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
1965                                                              // component size shift is 3
1966                                                              // (for 64 bit alignment).
1967    and    \xTemp0, \xTemp0, #4
1968    add    \xTemp1, \xTemp1, \xTemp0
1969.endm
1970
1971.macro COMPUTE_ARRAY_SIZE_8 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1972    // Add array data offset and alignment.
1973    add    \xTemp1, \xCount, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1974.endm
1975
1976.macro COMPUTE_ARRAY_SIZE_16 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1977    lsl    \xTemp1, \xCount, #1
1978    // Add array data offset and alignment.
1979    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1980.endm
1981
1982.macro COMPUTE_ARRAY_SIZE_32 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1983    lsl    \xTemp1, \xCount, #2
1984    // Add array data offset and alignment.
1985    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1986.endm
1987
1988.macro COMPUTE_ARRAY_SIZE_64 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1989    lsl    \xTemp1, \xCount, #3
1990    // Add array data offset and alignment.
1991    add    \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1992.endm
1993
1994// TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
1995// the entrypoint once all backends have been updated to use the size variants.
1996GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
1997GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
1998GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
1999GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
2000GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
2001GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
2002GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
2003GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
2004GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
2005GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
2006
2007    /*
2008     * Called by managed code when the thread has been asked to suspend.
2009     */
2010    .extern artTestSuspendFromCode
2011ENTRY art_quick_test_suspend
2012    SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET  // save callee saves for stack crawl
2013    mov    x0, xSELF
2014    bl     artTestSuspendFromCode             // (Thread*)
2015    RESTORE_SAVE_EVERYTHING_FRAME
2016    REFRESH_MARKING_REGISTER
2017    ret
2018END art_quick_test_suspend
2019
2020ENTRY art_quick_implicit_suspend
2021    mov    x0, xSELF
2022    SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
2023    bl     artTestSuspendFromCode             // (Thread*)
2024    RESTORE_SAVE_REFS_ONLY_FRAME
2025    REFRESH_MARKING_REGISTER
2026    ret
2027END art_quick_implicit_suspend
2028
2029     /*
2030     * Called by managed code that is attempting to call a method on a proxy class. On entry
2031     * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
2032     * method agrees with a ref and args callee save frame.
2033     */
2034     .extern artQuickProxyInvokeHandler
2035ENTRY art_quick_proxy_invoke_handler
2036    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2037    mov     x2, xSELF                   // pass Thread::Current
2038    mov     x3, sp                      // pass SP
2039    bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
2040    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2041    cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
2042    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
2043    REFRESH_MARKING_REGISTER
2044    fmov    d0, x0                      // Store result in d0 in case it was float or double
2045    ret                                 // return on success
2046.Lexception_in_proxy:
2047    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2048    DELIVER_PENDING_EXCEPTION
2049END art_quick_proxy_invoke_handler
2050
2051    /*
2052     * Called to resolve an imt conflict.
2053     * x0 is the conflict ArtMethod.
2054     * xIP1 is a hidden argument that holds the target interface method's dex method index.
2055     *
2056     * Note that this stub writes to xIP0, xIP1, x13-x15, and x0.
2057     */
2058    .extern artLookupResolvedMethod
2059ENTRY art_quick_imt_conflict_trampoline
2060    ldr xIP0, [sp, #0]  // Load referrer
2061    // Load the declaring class (without read barrier) and access flags (for obsolete method check).
2062    // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
2063#if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4
2064#error "Expecting declaring class and access flags to be consecutive for LDP."
2065#endif
2066    ldp wIP0, w15, [xIP0, #ART_METHOD_DECLARING_CLASS_OFFSET]
2067    // If the method is obsolete, just go through the dex cache miss slow path.
2068    tbnz x15, #ACC_OBSOLETE_METHOD_SHIFT, .Limt_conflict_trampoline_dex_cache_miss
2069    ldr wIP0, [xIP0, #MIRROR_CLASS_DEX_CACHE_OFFSET]  // Load the DexCache (without read barrier).
2070    UNPOISON_HEAP_REF wIP0
2071    ubfx x15, xIP1, #0, #METHOD_DEX_CACHE_HASH_BITS  // Calculate DexCache method slot index.
2072    ldr xIP0, [xIP0, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET]  // Load the resolved methods.
2073    add xIP0, xIP0, x15, lsl #(POINTER_SIZE_SHIFT + 1)  // Load DexCache method slot address.
2074
2075    // Relaxed atomic load x14:x15 from the dex cache slot.
2076.Limt_conflict_trampoline_retry_load:
2077    ldxp x14, x15, [xIP0]
2078    stxp w13, x14, x15, [xIP0]
2079    cbnz w13, .Limt_conflict_trampoline_retry_load
2080
2081    cmp x15, xIP1       // Compare method index to see if we had a DexCache method hit.
2082    bne .Limt_conflict_trampoline_dex_cache_miss
2083.Limt_conflict_trampoline_have_interface_method:
2084    ldr xIP1, [x0, #ART_METHOD_JNI_OFFSET_64]  // Load ImtConflictTable
2085    ldr x0, [xIP1]  // Load first entry in ImtConflictTable.
2086.Limt_table_iterate:
2087    cmp x0, x14
2088    // Branch if found. Benchmarks have shown doing a branch here is better.
2089    beq .Limt_table_found
2090    // If the entry is null, the interface method is not in the ImtConflictTable.
2091    cbz x0, .Lconflict_trampoline
2092    // Iterate over the entries of the ImtConflictTable.
2093    ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
2094    b .Limt_table_iterate
2095.Limt_table_found:
2096    // We successfully hit an entry in the table. Load the target method
2097    // and jump to it.
2098    ldr x0, [xIP1, #__SIZEOF_POINTER__]
2099    ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
2100    br xIP0
2101.Lconflict_trampoline:
2102    // Call the runtime stub to populate the ImtConflictTable and jump to the
2103    // resolved method.
2104    mov x0, x14  // Load interface method
2105    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
2106.Limt_conflict_trampoline_dex_cache_miss:
2107    // We're not creating a proper runtime method frame here,
2108    // artLookupResolvedMethod() is not allowed to walk the stack.
2109
2110    // Save GPR args and return address, allocate space for FPR args, align stack.
2111    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, (8 * 8 + 8 * 8 + 8 + 8)
2112    SAVE_TWO_REGS x2, x3, 16
2113    SAVE_TWO_REGS x4, x5, 32
2114    SAVE_TWO_REGS x6, x7, 48
2115    SAVE_REG      xLR, (8 * 8 + 8 * 8 + 8)
2116
2117    // Save FPR args.
2118    stp d0, d1, [sp, #64]
2119    stp d2, d3, [sp, #80]
2120    stp d4, d5, [sp, #96]
2121    stp d6, d7, [sp, #112]
2122
2123    mov x0, xIP1                            // Pass method index.
2124    ldr x1, [sp, #(8 * 8 + 8 * 8 + 8 + 8)]  // Pass referrer.
2125    bl artLookupResolvedMethod              // (uint32_t method_index, ArtMethod* referrer)
2126    mov x14, x0   // Move the interface method to x14 where the loop above expects it.
2127
2128    // Restore FPR args.
2129    ldp d0, d1, [sp, #64]
2130    ldp d2, d3, [sp, #80]
2131    ldp d4, d5, [sp, #96]
2132    ldp d6, d7, [sp, #112]
2133
2134    // Restore GPR args and return address.
2135    RESTORE_REG      xLR, (8 * 8 + 8 * 8 + 8)
2136    RESTORE_TWO_REGS x2, x3, 16
2137    RESTORE_TWO_REGS x4, x5, 32
2138    RESTORE_TWO_REGS x6, x7, 48
2139    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, (8 * 8 + 8 * 8 + 8 + 8)
2140
2141    // If the method wasn't resolved, skip the lookup and go to artInvokeInterfaceTrampoline().
2142    cbz x14, .Lconflict_trampoline
2143    b .Limt_conflict_trampoline_have_interface_method
2144END art_quick_imt_conflict_trampoline
2145
2146ENTRY art_quick_resolution_trampoline
2147    SETUP_SAVE_REFS_AND_ARGS_FRAME
2148    mov x2, xSELF
2149    mov x3, sp
2150    bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
2151    cbz x0, 1f
2152    mov xIP0, x0            // Remember returned code pointer in xIP0.
2153    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
2154    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2155    REFRESH_MARKING_REGISTER
2156    br xIP0
21571:
2158    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2159    DELIVER_PENDING_EXCEPTION
2160END art_quick_resolution_trampoline
2161
2162/*
2163 * Generic JNI frame layout:
2164 *
2165 * #-------------------#
2166 * |                   |
2167 * | caller method...  |
2168 * #-------------------#    <--- SP on entry
2169 * | Return X30/LR     |
2170 * | X29/FP            |    callee save
2171 * | X28               |    callee save
2172 * | X27               |    callee save
2173 * | X26               |    callee save
2174 * | X25               |    callee save
2175 * | X24               |    callee save
2176 * | X23               |    callee save
2177 * | X22               |    callee save
2178 * | X21               |    callee save
2179 * | X20               |    callee save
2180 * | X19               |    callee save
2181 * | X7                |    arg7
2182 * | X6                |    arg6
2183 * | X5                |    arg5
2184 * | X4                |    arg4
2185 * | X3                |    arg3
2186 * | X2                |    arg2
2187 * | X1                |    arg1
2188 * | D7                |    float arg 8
2189 * | D6                |    float arg 7
2190 * | D5                |    float arg 6
2191 * | D4                |    float arg 5
2192 * | D3                |    float arg 4
2193 * | D2                |    float arg 3
2194 * | D1                |    float arg 2
2195 * | D0                |    float arg 1
2196 * | Method*           | <- X0
2197 * #-------------------#
2198 * | local ref cookie  | // 4B
2199 * | handle scope size | // 4B
2200 * #-------------------#
2201 * | JNI Call Stack    |
2202 * #-------------------#    <--- SP on native call
2203 * |                   |
2204 * | Stack for Regs    |    The trampoline assembly will pop these values
2205 * |                   |    into registers for native call
2206 * #-------------------#
2207 * | Native code ptr   |
2208 * #-------------------#
2209 * | Free scratch      |
2210 * #-------------------#
2211 * | Ptr to (1)        |    <--- SP
2212 * #-------------------#
2213 */
2214    /*
2215     * Called to do a generic JNI down-call
2216     */
2217ENTRY art_quick_generic_jni_trampoline
2218    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2219
2220    // Save SP , so we can have static CFI info.
2221    mov x28, sp
2222    .cfi_def_cfa_register x28
2223
2224    // This looks the same, but is different: this will be updated to point to the bottom
2225    // of the frame when the handle scope is inserted.
2226    mov xFP, sp
2227
2228    mov xIP0, #5120
2229    sub sp, sp, xIP0
2230
2231    // prepare for artQuickGenericJniTrampoline call
2232    // (Thread*,  SP)
2233    //    x0      x1   <= C calling convention
2234    //   xSELF    xFP  <= where they are
2235
2236    mov x0, xSELF   // Thread*
2237    mov x1, xFP
2238    bl artQuickGenericJniTrampoline  // (Thread*, sp)
2239
2240    // The C call will have registered the complete save-frame on success.
2241    // The result of the call is:
2242    // x0: pointer to native code, 0 on error.
2243    // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
2244
2245    // Check for error = 0.
2246    cbz x0, .Lexception_in_native
2247
2248    // Release part of the alloca.
2249    mov sp, x1
2250
2251    // Save the code pointer
2252    mov xIP0, x0
2253
2254    // Load parameters from frame into registers.
2255    // TODO Check with artQuickGenericJniTrampoline.
2256    //      Also, check again APPCS64 - the stack arguments are interleaved.
2257    ldp x0, x1, [sp]
2258    ldp x2, x3, [sp, #16]
2259    ldp x4, x5, [sp, #32]
2260    ldp x6, x7, [sp, #48]
2261
2262    ldp d0, d1, [sp, #64]
2263    ldp d2, d3, [sp, #80]
2264    ldp d4, d5, [sp, #96]
2265    ldp d6, d7, [sp, #112]
2266
2267    add sp, sp, #128
2268
2269    blr xIP0        // native call.
2270
2271    // result sign extension is handled in C code
2272    // prepare for artQuickGenericJniEndTrampoline call
2273    // (Thread*, result, result_f)
2274    //    x0       x1       x2        <= C calling convention
2275    mov x1, x0      // Result (from saved).
2276    mov x0, xSELF   // Thread register.
2277    fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
2278
2279    bl artQuickGenericJniEndTrampoline
2280
2281    // Pending exceptions possible.
2282    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2283    cbnz x2, .Lexception_in_native
2284
2285    // Tear down the alloca.
2286    mov sp, x28
2287    .cfi_def_cfa_register sp
2288
2289    // Tear down the callee-save frame.
2290    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2291    REFRESH_MARKING_REGISTER
2292
2293    // store into fpr, for when it's a fpr return...
2294    fmov d0, x0
2295    ret
2296
2297.Lexception_in_native:
2298    // Move to x1 then sp to please assembler.
2299    ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
2300    add sp, x1, #-1  // Remove the GenericJNI tag.
2301    .cfi_def_cfa_register sp
2302    # This will create a new save-all frame, required by the runtime.
2303    DELIVER_PENDING_EXCEPTION
2304END art_quick_generic_jni_trampoline
2305
2306/*
2307 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
2308 * of a quick call:
2309 * x0 = method being called/to bridge to.
2310 * x1..x7, d0..d7 = arguments to that method.
2311 */
2312ENTRY art_quick_to_interpreter_bridge
2313    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
2314
2315    //  x0 will contain mirror::ArtMethod* method.
2316    mov x1, xSELF                          // How to get Thread::Current() ???
2317    mov x2, sp
2318
2319    // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
2320    //                                      mirror::ArtMethod** sp)
2321    bl   artQuickToInterpreterBridge
2322
2323    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
2324    REFRESH_MARKING_REGISTER
2325
2326    fmov d0, x0
2327
2328    RETURN_OR_DELIVER_PENDING_EXCEPTION
2329END art_quick_to_interpreter_bridge
2330
2331/*
2332 * Called to attempt to execute an obsolete method.
2333 */
2334ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
2335
2336
2337//
2338// Instrumentation-related stubs
2339//
2340    .extern artInstrumentationMethodEntryFromCode
2341ENTRY art_quick_instrumentation_entry
2342    SETUP_SAVE_REFS_AND_ARGS_FRAME
2343
2344    mov   x20, x0             // Preserve method reference in a callee-save.
2345
2346    mov   x2, xSELF
2347    mov   x3, sp  // Pass SP
2348    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP)
2349
2350    mov   xIP0, x0            // x0 = result of call.
2351    mov   x0, x20             // Reload method reference.
2352
2353    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
2354    REFRESH_MARKING_REGISTER
2355    cbz   xIP0, 1f            // Deliver the pending exception if method is null.
2356    adr   xLR, art_quick_instrumentation_exit
2357    br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
2358
23591:
2360    DELIVER_PENDING_EXCEPTION
2361END art_quick_instrumentation_entry
2362
2363    .extern artInstrumentationMethodExitFromCode
2364ENTRY art_quick_instrumentation_exit
2365    mov   xLR, #0             // Clobber LR for later checks.
2366    SETUP_SAVE_EVERYTHING_FRAME
2367
2368    add   x3, sp, #8          // Pass floating-point result pointer, in kSaveEverything frame.
2369    add   x2, sp, #264        // Pass integer result pointer, in kSaveEverything frame.
2370    mov   x1, sp              // Pass SP.
2371    mov   x0, xSELF           // Pass Thread.
2372    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res*, fpr_res*)
2373
2374    cbz   x0, .Ldo_deliver_instrumentation_exception
2375                              // Handle error
2376    cbnz  x1, .Ldeoptimize
2377    // Normal return.
2378    str   x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 8]
2379                              // Set return pc.
2380    RESTORE_SAVE_EVERYTHING_FRAME
2381    REFRESH_MARKING_REGISTER
2382    br    lr
2383.Ldo_deliver_instrumentation_exception:
2384    DELIVER_PENDING_EXCEPTION_FRAME_READY
2385.Ldeoptimize:
2386    str   x1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 8]
2387                              // Set return pc.
2388    RESTORE_SAVE_EVERYTHING_FRAME
2389    // Jump to art_quick_deoptimize.
2390    b     art_quick_deoptimize
2391END art_quick_instrumentation_exit
2392
2393    /*
2394     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
2395     * will long jump to the upcall with a special exception of -1.
2396     */
2397    .extern artDeoptimize
2398ENTRY art_quick_deoptimize
2399    SETUP_SAVE_EVERYTHING_FRAME
2400    mov    x0, xSELF          // Pass thread.
2401    bl     artDeoptimize      // (Thread*)
2402    brk 0
2403END art_quick_deoptimize
2404
2405    /*
2406     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
2407     * will long jump to the upcall with a special exception of -1.
2408     */
2409    .extern artDeoptimizeFromCompiledCode
2410ENTRY art_quick_deoptimize_from_compiled_code
2411    SETUP_SAVE_EVERYTHING_FRAME
2412    mov    x1, xSELF                      // Pass thread.
2413    bl     artDeoptimizeFromCompiledCode  // (DeoptimizationKind, Thread*)
2414    brk 0
2415END art_quick_deoptimize_from_compiled_code
2416
2417
2418    /*
2419     * String's indexOf.
2420     *
2421     * TODO: Not very optimized.
2422     * On entry:
2423     *    x0:   string object (known non-null)
2424     *    w1:   char to match (known <= 0xFFFF)
2425     *    w2:   Starting offset in string data
2426     */
2427ENTRY art_quick_indexof
2428#if (STRING_COMPRESSION_FEATURE)
2429    ldr   w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
2430#else
2431    ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
2432#endif
2433    add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
2434#if (STRING_COMPRESSION_FEATURE)
2435    /* w4 holds count (with flag) and w3 holds actual length */
2436    lsr   w3, w4, #1
2437#endif
2438    /* Clamp start to [0..count] */
2439    cmp   w2, #0
2440    csel  w2, wzr, w2, lt
2441    cmp   w2, w3
2442    csel  w2, w3, w2, gt
2443
2444    /* Save a copy to compute result */
2445    mov   x5, x0
2446
2447#if (STRING_COMPRESSION_FEATURE)
2448    tbz   w4, #0, .Lstring_indexof_compressed
2449#endif
2450    /* Build pointer to start of data to compare and pre-bias */
2451    add   x0, x0, x2, lsl #1
2452    sub   x0, x0, #2
2453    /* Compute iteration count */
2454    sub   w2, w3, w2
2455
2456    /*
2457     * At this point we have:
2458     *  x0: start of the data to test
2459     *  w1: char to compare
2460     *  w2: iteration count
2461     *  x5: original start of string data
2462     */
2463
2464    subs  w2, w2, #4
2465    b.lt  .Lindexof_remainder
2466
2467.Lindexof_loop4:
2468    ldrh  w6, [x0, #2]!
2469    ldrh  w7, [x0, #2]!
2470    ldrh  wIP0, [x0, #2]!
2471    ldrh  wIP1, [x0, #2]!
2472    cmp   w6, w1
2473    b.eq  .Lmatch_0
2474    cmp   w7, w1
2475    b.eq  .Lmatch_1
2476    cmp   wIP0, w1
2477    b.eq  .Lmatch_2
2478    cmp   wIP1, w1
2479    b.eq  .Lmatch_3
2480    subs  w2, w2, #4
2481    b.ge  .Lindexof_loop4
2482
2483.Lindexof_remainder:
2484    adds  w2, w2, #4
2485    b.eq  .Lindexof_nomatch
2486
2487.Lindexof_loop1:
2488    ldrh  w6, [x0, #2]!
2489    cmp   w6, w1
2490    b.eq  .Lmatch_3
2491    subs  w2, w2, #1
2492    b.ne  .Lindexof_loop1
2493
2494.Lindexof_nomatch:
2495    mov   x0, #-1
2496    ret
2497
2498.Lmatch_0:
2499    sub   x0, x0, #6
2500    sub   x0, x0, x5
2501    asr   x0, x0, #1
2502    ret
2503.Lmatch_1:
2504    sub   x0, x0, #4
2505    sub   x0, x0, x5
2506    asr   x0, x0, #1
2507    ret
2508.Lmatch_2:
2509    sub   x0, x0, #2
2510    sub   x0, x0, x5
2511    asr   x0, x0, #1
2512    ret
2513.Lmatch_3:
2514    sub   x0, x0, x5
2515    asr   x0, x0, #1
2516    ret
2517#if (STRING_COMPRESSION_FEATURE)
2518   /*
2519    * Comparing compressed string character-per-character with
2520    * input character
2521    */
2522.Lstring_indexof_compressed:
2523    add   x0, x0, x2
2524    sub   x0, x0, #1
2525    sub   w2, w3, w2
2526.Lstring_indexof_compressed_loop:
2527    subs  w2, w2, #1
2528    b.lt  .Lindexof_nomatch
2529    ldrb  w6, [x0, #1]!
2530    cmp   w6, w1
2531    b.eq  .Lstring_indexof_compressed_matched
2532    b     .Lstring_indexof_compressed_loop
2533.Lstring_indexof_compressed_matched:
2534    sub   x0, x0, x5
2535    ret
2536#endif
2537END art_quick_indexof
2538
2539    /*
2540     * Create a function `name` calling the ReadBarrier::Mark routine,
2541     * getting its argument and returning its result through W register
2542     * `wreg` (corresponding to X register `xreg`), saving and restoring
2543     * all caller-save registers.
2544     *
2545     * If `wreg` is different from `w0`, the generated function follows a
2546     * non-standard runtime calling convention:
2547     * - register `wreg` is used to pass the (sole) argument of this
2548     *   function (instead of W0);
2549     * - register `wreg` is used to return the result of this function
2550     *   (instead of W0);
2551     * - W0 is treated like a normal (non-argument) caller-save register;
2552     * - everything else is the same as in the standard runtime calling
2553     *   convention (e.g. standard callee-save registers are preserved).
2554     */
2555.macro READ_BARRIER_MARK_REG name, wreg, xreg
2556ENTRY \name
2557    // Reference is null, no work to do at all.
2558    cbz \wreg, .Lret_rb_\name
2559    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
2560    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2561    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
2562.Lret_rb_\name:
2563    ret
2564.Lnot_marked_rb_\name:
2565    // Check if the top two bits are one, if this is the case it is a forwarding address.
2566    tst   wIP0, wIP0, lsl #1
2567    bmi   .Lret_forwarding_address\name
2568.Lslow_rb_\name:
2569    /*
2570     * Allocate 44 stack slots * 8 = 352 bytes:
2571     * - 20 slots for core registers X0-15, X17-X19, LR
2572     * - 24 slots for floating-point registers D0-D7 and D16-D31
2573     */
2574    // We must not clobber IP1 since code emitted for HLoadClass and HLoadString
2575    // relies on IP1 being preserved.
2576    // Save all potentially live caller-save core registers.
2577    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
2578    SAVE_TWO_REGS  x2,  x3, 16
2579    SAVE_TWO_REGS  x4,  x5, 32
2580    SAVE_TWO_REGS  x6,  x7, 48
2581    SAVE_TWO_REGS  x8,  x9, 64
2582    SAVE_TWO_REGS x10, x11, 80
2583    SAVE_TWO_REGS x12, x13, 96
2584    SAVE_TWO_REGS x14, x15, 112
2585    SAVE_REG x17, 128  // Skip x16, i.e. IP0.
2586    SAVE_TWO_REGS x19, xLR, 144  // Save also return address.
2587    // Save all potentially live caller-save floating-point registers.
2588    stp   d0, d1,   [sp, #160]
2589    stp   d2, d3,   [sp, #176]
2590    stp   d4, d5,   [sp, #192]
2591    stp   d6, d7,   [sp, #208]
2592    stp   d16, d17, [sp, #224]
2593    stp   d18, d19, [sp, #240]
2594    stp   d20, d21, [sp, #256]
2595    stp   d22, d23, [sp, #272]
2596    stp   d24, d25, [sp, #288]
2597    stp   d26, d27, [sp, #304]
2598    stp   d28, d29, [sp, #320]
2599    stp   d30, d31, [sp, #336]
2600
2601    .ifnc \wreg, w0
2602      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
2603    .endif
2604    bl    artReadBarrierMark            // artReadBarrierMark(obj)
2605    .ifnc \wreg, w0
2606      mov   \wreg, w0                   // Return result into `wreg`
2607    .endif
2608
2609    // Restore core regs, except `xreg`, as `wreg` is used to return the
2610    // result of this function (simply remove it from the stack instead).
2611    POP_REGS_NE x0, x1,   0,   \xreg
2612    POP_REGS_NE x2, x3,   16,  \xreg
2613    POP_REGS_NE x4, x5,   32,  \xreg
2614    POP_REGS_NE x6, x7,   48,  \xreg
2615    POP_REGS_NE x8, x9,   64,  \xreg
2616    POP_REGS_NE x10, x11, 80,  \xreg
2617    POP_REGS_NE x12, x13, 96,  \xreg
2618    POP_REGS_NE x14, x15, 112, \xreg
2619    POP_REG_NE x17, 128, \xreg
2620    POP_REGS_NE x19, xLR, 144, \xreg  // Restore also return address.
2621    // Restore floating-point registers.
2622    ldp   d0, d1,   [sp, #160]
2623    ldp   d2, d3,   [sp, #176]
2624    ldp   d4, d5,   [sp, #192]
2625    ldp   d6, d7,   [sp, #208]
2626    ldp   d16, d17, [sp, #224]
2627    ldp   d18, d19, [sp, #240]
2628    ldp   d20, d21, [sp, #256]
2629    ldp   d22, d23, [sp, #272]
2630    ldp   d24, d25, [sp, #288]
2631    ldp   d26, d27, [sp, #304]
2632    ldp   d28, d29, [sp, #320]
2633    ldp   d30, d31, [sp, #336]
2634    // Remove frame and return.
2635    DECREASE_FRAME 352
2636    ret
2637.Lret_forwarding_address\name:
2638    // Shift left by the forwarding address shift. This clears out the state bits since they are
2639    // in the top 2 bits of the lock word.
2640    lsl   \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2641    ret
2642END \name
2643.endm
2644
2645READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
2646READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
2647READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
2648READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
2649READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
2650READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
2651READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
2652READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
2653READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
2654READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
2655READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
2656READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
2657READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
2658READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
2659READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
2660READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
2661// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
2662READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
2663READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
2664READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
2665READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
2666READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
2667READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
2668READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
2669READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
2670READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
2671READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
2672READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
2673READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
2674READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
2675
2676
2677.macro SELECT_X_OR_W_FOR_MACRO macro_to_use, x, w, xreg
2678    .if \xreg
2679      \macro_to_use \x
2680    .else
2681      \macro_to_use \w
2682    .endif
2683.endm
2684
2685.macro FOR_REGISTERS macro_for_register, macro_for_reserved_register, xreg
2686    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x0, w0, \xreg
2687    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x1, w1, \xreg
2688    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x2, w2, \xreg
2689    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x3, w3, \xreg
2690    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x4, w4, \xreg
2691    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x5, w5, \xreg
2692    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x6, w6, \xreg
2693    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x7, w7, \xreg
2694    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x8, w8, \xreg
2695    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x9, w9, \xreg
2696    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x10, w10, \xreg
2697    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x11, w11, \xreg
2698    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x12, w12, \xreg
2699    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x13, w13, \xreg
2700    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x14, w14, \xreg
2701    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x15, w15, \xreg
2702    \macro_for_reserved_register  // IP0 is reserved
2703    \macro_for_reserved_register  // IP1 is reserved
2704    \macro_for_reserved_register  // X18 is reserved
2705    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x19, w19, \xreg
2706    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x20, w20, \xreg
2707    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x21, w21, \xreg
2708    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x22, w22, \xreg
2709    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x23, w23, \xreg
2710    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x24, w24, \xreg
2711    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x25, w25, \xreg
2712    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x26, w26, \xreg
2713    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x27, w27, \xreg
2714    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x28, w28, \xreg
2715    SELECT_X_OR_W_FOR_MACRO \macro_for_register, x29, w29, \xreg
2716    \macro_for_reserved_register  // lr is reserved
2717    \macro_for_reserved_register  // sp is reserved
2718.endm
2719
2720.macro FOR_XREGISTERS macro_for_register, macro_for_reserved_register
2721    FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 1
2722.endm
2723
2724.macro FOR_WREGISTERS macro_for_register, macro_for_reserved_register
2725    FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 0
2726.endm
2727
2728.macro BRK0_BRK0
2729    brk 0
2730    brk 0
2731.endm
2732
2733#if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
2734#error "Array and field introspection code sharing requires same LDR offset."
2735#endif
2736.macro INTROSPECTION_ARRAY_LOAD index_reg
2737    ldr   wIP0, [xIP0, \index_reg, lsl #2]
2738    b     art_quick_read_barrier_mark_introspection
2739.endm
2740
2741.macro MOV_WIP0_TO_WREG_AND_BL_LR reg
2742    mov   \reg, wIP0
2743    br    lr  // Do not use RET as we do not enter the entrypoint with "BL".
2744.endm
2745
2746.macro READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH ldr_offset
2747    /*
2748     * Allocate 44 stack slots * 8 = 352 bytes:
2749     * - 19 slots for core registers X0-15, X18-X19, LR
2750     * - 1 slot padding
2751     * - 24 slots for floating-point registers D0-D7 and D16-D31
2752     */
2753    // Save all potentially live caller-save core registers.
2754    SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
2755    SAVE_TWO_REGS  x2,  x3, 16
2756    SAVE_TWO_REGS  x4,  x5, 32
2757    SAVE_TWO_REGS  x6,  x7, 48
2758    SAVE_TWO_REGS  x8,  x9, 64
2759    SAVE_TWO_REGS x10, x11, 80
2760    SAVE_TWO_REGS x12, x13, 96
2761    SAVE_TWO_REGS x14, x15, 112
2762    SAVE_REG x19, 136       // Skip x16, x17, i.e. IP0, IP1.
2763    SAVE_REG      xLR,      144       // Save return address, skip padding at 152.
2764    // Save all potentially live caller-save floating-point registers.
2765    stp   d0, d1,   [sp, #160]
2766    stp   d2, d3,   [sp, #176]
2767    stp   d4, d5,   [sp, #192]
2768    stp   d6, d7,   [sp, #208]
2769    stp   d16, d17, [sp, #224]
2770    stp   d18, d19, [sp, #240]
2771    stp   d20, d21, [sp, #256]
2772    stp   d22, d23, [sp, #272]
2773    stp   d24, d25, [sp, #288]
2774    stp   d26, d27, [sp, #304]
2775    stp   d28, d29, [sp, #320]
2776    stp   d30, d31, [sp, #336]
2777
2778    mov   x0, xIP0
2779    bl    artReadBarrierMark          // artReadBarrierMark(obj)
2780    mov   xIP0, x0
2781
2782    // Restore core regs, except x0 and x1 as the return register switch case
2783    // address calculation is smoother with an extra register.
2784    RESTORE_TWO_REGS  x2,  x3, 16
2785    RESTORE_TWO_REGS  x4,  x5, 32
2786    RESTORE_TWO_REGS  x6,  x7, 48
2787    RESTORE_TWO_REGS  x8,  x9, 64
2788    RESTORE_TWO_REGS x10, x11, 80
2789    RESTORE_TWO_REGS x12, x13, 96
2790    RESTORE_TWO_REGS x14, x15, 112
2791    RESTORE_REG x19, 136    // Skip x16, x17, i.e. IP0, IP1.
2792    RESTORE_REG      xLR,      144    // Restore return address.
2793    // Restore caller-save floating-point registers.
2794    ldp   d0, d1,   [sp, #160]
2795    ldp   d2, d3,   [sp, #176]
2796    ldp   d4, d5,   [sp, #192]
2797    ldp   d6, d7,   [sp, #208]
2798    ldp   d16, d17, [sp, #224]
2799    ldp   d18, d19, [sp, #240]
2800    ldp   d20, d21, [sp, #256]
2801    ldp   d22, d23, [sp, #272]
2802    ldp   d24, d25, [sp, #288]
2803    ldp   d26, d27, [sp, #304]
2804    ldp   d28, d29, [sp, #320]
2805    ldp   d30, d31, [sp, #336]
2806
2807    ldr   x0, [lr, #\ldr_offset]      // Load the instruction.
2808    adr   xIP1, .Lmark_introspection_return_switch
2809    bfi   xIP1, x0, #3, #5            // Calculate switch case address.
2810    RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 352
2811    br    xIP1
2812.endm
2813
2814    /*
2815     * Use introspection to load a reference from the same address as the LDR
2816     * instruction in generated code would load (unless loaded by the thunk,
2817     * see below), call ReadBarrier::Mark() with that reference if needed
2818     * and return it in the same register as the LDR instruction would load.
2819     *
2820     * The entrypoint is called through a thunk that differs across load kinds.
2821     * For field and array loads the LDR instruction in generated code follows
2822     * the branch to the thunk, i.e. the LDR is at [LR, #-4], and the thunk
2823     * knows the holder and performs the gray bit check, returning to the LDR
2824     * instruction if the object is not gray, so this entrypoint no longer
2825     * needs to know anything about the holder. For GC root loads, the LDR
2826     * instruction in generated code precedes the branch to the thunk (i.e.
2827     * the LDR is at [LR, #-8]) and the thunk does not do the gray bit check.
2828     *
2829     * For field accesses and array loads with a constant index the thunk loads
2830     * the reference into IP0 using introspection and calls the main entrypoint,
2831     * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
2832     * the passed reference is poisoned.
2833     *
2834     * For array accesses with non-constant index, the thunk inserts the bits
2835     * 16-21 of the LDR instruction to the entrypoint address, effectively
2836     * calculating a switch case label based on the index register (bits 16-20)
2837     * and adding an extra offset (bit 21 is set) to differentiate from the
2838     * main entrypoint, then moves the base register to IP0 and jumps to the
2839     * switch case. Therefore we need to align the main entrypoint to 512 bytes,
2840     * accounting for a 256-byte offset followed by 32 array entrypoints
2841     * starting at art_quick_read_barrier_mark_introspection_arrays, each
2842     * containing an LDR (register) and a branch to the main entrypoint.
2843     *
2844     * For GC root accesses we cannot use the main entrypoint because of the
2845     * different offset where the LDR instruction in generated code is located.
2846     * (And even with heap poisoning enabled, GC roots are not poisoned.)
2847     * To re-use the same entrypoint pointer in generated code, we make sure
2848     * that the gc root entrypoint (a copy of the entrypoint with a different
2849     * offset for introspection loads) is located at a known offset (768 bytes,
2850     * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
2851     * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
2852     * the root register to IP0 and jumps to the customized entrypoint,
2853     * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
2854     * performs all the fast-path checks, so we need just the slow path.
2855     *
2856     * The code structure is
2857     *   art_quick_read_barrier_mark_introspection:
2858     *     Up to 256 bytes for the main entrypoint code.
2859     *     Padding to 256 bytes if needed.
2860     *   art_quick_read_barrier_mark_introspection_arrays:
2861     *     Exactly 256 bytes for array load switch cases (32x2 instructions).
2862     *   .Lmark_introspection_return_switch:
2863     *     Exactly 256 bytes for return switch cases (32x2 instructions).
2864     *   art_quick_read_barrier_mark_introspection_gc_roots:
2865     *     GC root entrypoint code.
2866     */
2867    .balign 512
2868ENTRY art_quick_read_barrier_mark_introspection
2869    // At this point, IP0 contains the reference, IP1 can be freely used.
2870    // For heap poisoning, the reference is poisoned, so unpoison it first.
2871    UNPOISON_HEAP_REF wIP0
2872    // If reference is null, just return it in the right register.
2873    cbz   wIP0, .Lmark_introspection_return
2874    // Use wIP1 as temp and check the mark bit of the reference.
2875    ldr   wIP1, [xIP0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2876    tbz   wIP1, #LOCK_WORD_MARK_BIT_SHIFT, .Lmark_introspection_unmarked
2877.Lmark_introspection_return:
2878    // Without an extra register for the return switch case address calculation,
2879    // we exploit the high word of the xIP0 to temporarily store the ref_reg*8,
2880    // so the return switch below must move wIP0 instead of xIP0 to the register.
2881    ldr   wIP1, [lr, #BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET]  // Load the instruction.
2882    bfi   xIP0, xIP1, #(32 + 3), #5   // Extract ref_reg*8 to high word in xIP0.
2883    adr   xIP1, .Lmark_introspection_return_switch
2884    bfxil xIP1, xIP0, #32, #8         // Calculate return switch case address.
2885    br    xIP1
2886.Lmark_introspection_unmarked:
2887    // Check if the top two bits are one, if this is the case it is a forwarding address.
2888    tst   wIP1, wIP1, lsl #1
2889    bmi   .Lmark_introspection_forwarding_address
2890    READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
2891
2892.Lmark_introspection_forwarding_address:
2893    // Shift left by the forwarding address shift. This clears out the state bits since they are
2894    // in the top 2 bits of the lock word.
2895    lsl   wIP0, wIP1, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
2896    b .Lmark_introspection_return
2897
2898    // We're very close to the alloted 256B for the entrypoint code before the
2899    // array switch cases. Should we go a little bit over the limit, we can
2900    // move some code after the array switch cases and return switch cases.
2901    .balign 256
2902    .hidden art_quick_read_barrier_mark_introspection_arrays
2903    .global art_quick_read_barrier_mark_introspection_arrays
2904art_quick_read_barrier_mark_introspection_arrays:
2905    FOR_XREGISTERS INTROSPECTION_ARRAY_LOAD, BRK0_BRK0
2906.Lmark_introspection_return_switch:
2907    FOR_WREGISTERS MOV_WIP0_TO_WREG_AND_BL_LR, BRK0_BRK0
2908    .hidden art_quick_read_barrier_mark_introspection_gc_roots
2909    .global art_quick_read_barrier_mark_introspection_gc_roots
2910art_quick_read_barrier_mark_introspection_gc_roots:
2911    READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
2912END art_quick_read_barrier_mark_introspection
2913
2914.extern artInvokePolymorphic
2915ENTRY art_quick_invoke_polymorphic
2916    SETUP_SAVE_REFS_AND_ARGS_FRAME                // Save callee saves in case allocation triggers GC.
2917    mov     x2, xSELF
2918    mov     x3, sp
2919    INCREASE_FRAME 16                             // Reserve space for JValue result.
2920    str     xzr, [sp, #0]                         // Initialize result to zero.
2921    mov     x0, sp                                // Set r0 to point to result.
2922    bl      artInvokePolymorphic                  // artInvokePolymorphic(result, receiver, thread, save_area)
2923    uxtb    w0, w0                                // Result is the return type descriptor as a char.
2924    sub     w0, w0, 'A'                           // Convert to zero based index.
2925    cmp     w0, 'Z' - 'A'
2926    bhi     .Lcleanup_and_return                  // Clean-up if out-of-bounds.
2927    adrp    x1, .Lhandler_table                   // Compute address of handler table.
2928    add     x1, x1, :lo12:.Lhandler_table
2929    ldrb    w0, [x1, w0, uxtw]                    // Lookup handler offset in handler table.
2930    adr     x1, .Lstart_of_handlers
2931    add     x0, x1, w0, sxtb #2                   // Convert relative offset to absolute address.
2932    br      x0                                    // Branch to handler.
2933
2934.Lstart_of_handlers:
2935.Lstore_boolean_result:
2936    ldrb    w0, [sp]
2937    b       .Lcleanup_and_return
2938.Lstore_char_result:
2939    ldrh    w0, [sp]
2940    b       .Lcleanup_and_return
2941.Lstore_float_result:
2942    ldr     s0, [sp]
2943    str     s0, [sp, #32]
2944    b       .Lcleanup_and_return
2945.Lstore_double_result:
2946    ldr     d0, [sp]
2947    str     d0, [sp, #32]
2948    b       .Lcleanup_and_return
2949.Lstore_long_result:
2950    ldr     x0, [sp]
2951    // Fall-through
2952.Lcleanup_and_return:
2953    DECREASE_FRAME 16
2954    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2955    REFRESH_MARKING_REGISTER
2956    RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
2957
2958    .section    .rodata                           // Place handler table in read-only section away from text.
2959    .align  2
2960.macro HANDLER_TABLE_OFFSET handler_label
2961    .byte (\handler_label - .Lstart_of_handlers) / 4
2962.endm
2963.Lhandler_table:
2964    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
2965    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // B (byte)
2966    HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
2967    HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
2968    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
2969    HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
2970    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
2971    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
2972    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // I (int)
2973    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
2974    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
2975    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // L (object - references are compressed and only 32-bits)
2976    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
2977    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
2978    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
2979    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
2980    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
2981    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
2982    HANDLER_TABLE_OFFSET(.Lstore_long_result)     // S (short)
2983    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
2984    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
2985    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
2986    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
2987    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
2988    HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
2989    HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
2990    .text
2991
2992END  art_quick_invoke_polymorphic
2993