quick_entrypoints_arm64.S revision fd36f1f927c138575184a1f4c7ea4e7abb3e2dbf
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm64.S"
18
19#include "arch/quick_alloc_entrypoints.S"
20
21
22    /*
23     * Macro that sets up the callee save frame to conform with
24     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
25     */
26.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
27    // art::Runtime** xIP0 = &art::Runtime::instance_
28    adrp xIP0, :got:_ZN3art7Runtime9instance_E
29    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
30
31    // Our registers aren't intermixed - just spill in order.
32    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
33
34    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
35    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
36
37    sub sp, sp, #176
38    .cfi_adjust_cfa_offset 176
39
40    // Ugly compile-time check, but we only have the preprocessor.
41#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
42#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
43#endif
44
45    // Stack alignment filler [sp, #8].
46    // FP callee-saves.
47    stp d8, d9,   [sp, #16]
48    stp d10, d11, [sp, #32]
49    stp d12, d13, [sp, #48]
50    stp d14, d15, [sp, #64]
51
52    // GP callee-saves
53    stp x19, x20, [sp, #80]
54    .cfi_rel_offset x19, 80
55    .cfi_rel_offset x20, 88
56
57    stp x21, x22, [sp, #96]
58    .cfi_rel_offset x21, 96
59    .cfi_rel_offset x22, 104
60
61    stp x23, x24, [sp, #112]
62    .cfi_rel_offset x23, 112
63    .cfi_rel_offset x24, 120
64
65    stp x25, x26, [sp, #128]
66    .cfi_rel_offset x25, 128
67    .cfi_rel_offset x26, 136
68
69    stp x27, x28, [sp, #144]
70    .cfi_rel_offset x27, 144
71    .cfi_rel_offset x28, 152
72
73    stp x29, xLR, [sp, #160]
74    .cfi_rel_offset x29, 160
75    .cfi_rel_offset x30, 168
76
77    // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
78    str xIP0, [sp]
79    // Place sp in Thread::Current()->top_quick_frame.
80    mov xIP0, sp
81    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
82.endm
83
84    /*
85     * Macro that sets up the callee save frame to conform with
86     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
87     */
88.macro SETUP_SAVE_REFS_ONLY_FRAME
89    // art::Runtime** xIP0 = &art::Runtime::instance_
90    adrp xIP0, :got:_ZN3art7Runtime9instance_E
91    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
92
93    // Our registers aren't intermixed - just spill in order.
94    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
95
96    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
97    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
98
99    sub sp, sp, #96
100    .cfi_adjust_cfa_offset 96
101
102    // Ugly compile-time check, but we only have the preprocessor.
103#if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
104#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
105#endif
106
107    // GP callee-saves.
108    // x20 paired with ArtMethod* - see below.
109    stp x21, x22, [sp, #16]
110    .cfi_rel_offset x21, 16
111    .cfi_rel_offset x22, 24
112
113    stp x23, x24, [sp, #32]
114    .cfi_rel_offset x23, 32
115    .cfi_rel_offset x24, 40
116
117    stp x25, x26, [sp, #48]
118    .cfi_rel_offset x25, 48
119    .cfi_rel_offset x26, 56
120
121    stp x27, x28, [sp, #64]
122    .cfi_rel_offset x27, 64
123    .cfi_rel_offset x28, 72
124
125    stp x29, xLR, [sp, #80]
126    .cfi_rel_offset x29, 80
127    .cfi_rel_offset x30, 88
128
129    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
130    stp xIP0, x20, [sp]
131    .cfi_rel_offset x20, 8
132
133    // Place sp in Thread::Current()->top_quick_frame.
134    mov xIP0, sp
135    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
136.endm
137
138// TODO: Probably no need to restore registers preserved by aapcs64.
139.macro RESTORE_SAVE_REFS_ONLY_FRAME
140    // Callee-saves.
141    ldr x20, [sp, #8]
142    .cfi_restore x20
143
144    ldp x21, x22, [sp, #16]
145    .cfi_restore x21
146    .cfi_restore x22
147
148    ldp x23, x24, [sp, #32]
149    .cfi_restore x23
150    .cfi_restore x24
151
152    ldp x25, x26, [sp, #48]
153    .cfi_restore x25
154    .cfi_restore x26
155
156    ldp x27, x28, [sp, #64]
157    .cfi_restore x27
158    .cfi_restore x28
159
160    ldp x29, xLR, [sp, #80]
161    .cfi_restore x29
162    .cfi_restore x30
163
164    add sp, sp, #96
165    .cfi_adjust_cfa_offset -96
166.endm
167
168.macro POP_SAVE_REFS_ONLY_FRAME
169    add sp, sp, #96
170    .cfi_adjust_cfa_offset - 96
171.endm
172
173.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
174    RESTORE_SAVE_REFS_ONLY_FRAME
175    ret
176.endm
177
178
179.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
180    sub sp, sp, #224
181    .cfi_adjust_cfa_offset 224
182
183    // Ugly compile-time check, but we only have the preprocessor.
184#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
185#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
186#endif
187
188    // Stack alignment filler [sp, #8].
189    // FP args.
190    stp d0, d1, [sp, #16]
191    stp d2, d3, [sp, #32]
192    stp d4, d5, [sp, #48]
193    stp d6, d7, [sp, #64]
194
195    // Core args.
196    stp x1, x2, [sp, #80]
197    .cfi_rel_offset x1, 80
198    .cfi_rel_offset x2, 88
199
200    stp x3, x4, [sp, #96]
201    .cfi_rel_offset x3, 96
202    .cfi_rel_offset x4, 104
203
204    stp x5, x6, [sp, #112]
205    .cfi_rel_offset x5, 112
206    .cfi_rel_offset x6, 120
207
208    // x7, Callee-saves.
209    stp x7, x20, [sp, #128]
210    .cfi_rel_offset x7, 128
211    .cfi_rel_offset x20, 136
212
213    stp x21, x22, [sp, #144]
214    .cfi_rel_offset x21, 144
215    .cfi_rel_offset x22, 152
216
217    stp x23, x24, [sp, #160]
218    .cfi_rel_offset x23, 160
219    .cfi_rel_offset x24, 168
220
221    stp x25, x26, [sp, #176]
222    .cfi_rel_offset x25, 176
223    .cfi_rel_offset x26, 184
224
225    stp x27, x28, [sp, #192]
226    .cfi_rel_offset x27, 192
227    .cfi_rel_offset x28, 200
228
229    // x29(callee-save) and LR.
230    stp x29, xLR, [sp, #208]
231    .cfi_rel_offset x29, 208
232    .cfi_rel_offset x30, 216
233
234.endm
235
236    /*
237     * Macro that sets up the callee save frame to conform with
238     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
239     *
240     * TODO This is probably too conservative - saving FP & LR.
241     */
242.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
243    // art::Runtime** xIP0 = &art::Runtime::instance_
244    adrp xIP0, :got:_ZN3art7Runtime9instance_E
245    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
246
247    // Our registers aren't intermixed - just spill in order.
248    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
249
250    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
251    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
252
253    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
254
255    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
256    // Place sp in Thread::Current()->top_quick_frame.
257    mov xIP0, sp
258    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
259.endm
260
261.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
262    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
263    str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
264    // Place sp in Thread::Current()->top_quick_frame.
265    mov xIP0, sp
266    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
267.endm
268
269// TODO: Probably no need to restore registers preserved by aapcs64.
270.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
271    // FP args.
272    ldp d0, d1, [sp, #16]
273    ldp d2, d3, [sp, #32]
274    ldp d4, d5, [sp, #48]
275    ldp d6, d7, [sp, #64]
276
277    // Core args.
278    ldp x1, x2, [sp, #80]
279    .cfi_restore x1
280    .cfi_restore x2
281
282    ldp x3, x4, [sp, #96]
283    .cfi_restore x3
284    .cfi_restore x4
285
286    ldp x5, x6, [sp, #112]
287    .cfi_restore x5
288    .cfi_restore x6
289
290    // x7, Callee-saves.
291    ldp x7, x20, [sp, #128]
292    .cfi_restore x7
293    .cfi_restore x20
294
295    ldp x21, x22, [sp, #144]
296    .cfi_restore x21
297    .cfi_restore x22
298
299    ldp x23, x24, [sp, #160]
300    .cfi_restore x23
301    .cfi_restore x24
302
303    ldp x25, x26, [sp, #176]
304    .cfi_restore x25
305    .cfi_restore x26
306
307    ldp x27, x28, [sp, #192]
308    .cfi_restore x27
309    .cfi_restore x28
310
311    // x29(callee-save) and LR.
312    ldp x29, xLR, [sp, #208]
313    .cfi_restore x29
314    .cfi_restore x30
315
316    add sp, sp, #224
317    .cfi_adjust_cfa_offset -224
318.endm
319
320    /*
321     * Macro that sets up the callee save frame to conform with
322     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
323     */
324.macro SETUP_SAVE_EVERYTHING_FRAME
325    sub sp, sp, #512
326    .cfi_adjust_cfa_offset 512
327
328    // Ugly compile-time check, but we only have the preprocessor.
329#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
330#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
331#endif
332
333    // Save FP registers.
334    stp d0, d1,   [sp, #8]
335    stp d2, d3,   [sp, #24]
336    stp d4, d5,   [sp, #40]
337    stp d6, d7,   [sp, #56]
338    stp d8, d9,   [sp, #72]
339    stp d10, d11, [sp, #88]
340    stp d12, d13, [sp, #104]
341    stp d14, d15, [sp, #120]
342    stp d16, d17, [sp, #136]
343    stp d18, d19, [sp, #152]
344    stp d20, d21, [sp, #168]
345    stp d22, d23, [sp, #184]
346    stp d24, d25, [sp, #200]
347    stp d26, d27, [sp, #216]
348    stp d28, d29, [sp, #232]
349    stp d30, d31, [sp, #248]
350
351    // Save core registers.
352    str x0,       [sp, #264]
353    .cfi_rel_offset x0, 264
354
355    stp x1, x2,   [sp, #272]
356    .cfi_rel_offset x1, 272
357    .cfi_rel_offset x2, 280
358
359    stp x3, x4,   [sp, #288]
360    .cfi_rel_offset x3, 288
361    .cfi_rel_offset x4, 296
362
363    stp x5, x6,   [sp, #304]
364    .cfi_rel_offset x5, 304
365    .cfi_rel_offset x6, 312
366
367    stp x7, x8,   [sp, #320]
368    .cfi_rel_offset x7, 320
369    .cfi_rel_offset x8, 328
370
371    stp x9, x10,  [sp, #336]
372    .cfi_rel_offset x9, 336
373    .cfi_rel_offset x10, 344
374
375    stp x11, x12, [sp, #352]
376    .cfi_rel_offset x11, 352
377    .cfi_rel_offset x12, 360
378
379    stp x13, x14, [sp, #368]
380    .cfi_rel_offset x13, 368
381    .cfi_rel_offset x14, 376
382
383    stp x15, x16, [sp, #384]
384    .cfi_rel_offset x15, 384
385    .cfi_rel_offset x16, 392
386
387    stp x17, x18, [sp, #400]
388    .cfi_rel_offset x17, 400
389    .cfi_rel_offset x18, 408
390
391    stp x19, x20, [sp, #416]
392    .cfi_rel_offset x19, 416
393    .cfi_rel_offset x20, 424
394
395    stp x21, x22, [sp, #432]
396    .cfi_rel_offset x21, 432
397    .cfi_rel_offset x22, 440
398
399    stp x23, x24, [sp, #448]
400    .cfi_rel_offset x23, 448
401    .cfi_rel_offset x24, 456
402
403    stp x25, x26, [sp, #464]
404    .cfi_rel_offset x25, 464
405    .cfi_rel_offset x26, 472
406
407    stp x27, x28, [sp, #480]
408    .cfi_rel_offset x27, 480
409    .cfi_rel_offset x28, 488
410
411    stp x29, xLR, [sp, #496]
412    .cfi_rel_offset x29, 496
413    .cfi_rel_offset x30, 504
414
415    // art::Runtime** xIP0 = &art::Runtime::instance_
416    adrp xIP0, :got:_ZN3art7Runtime9instance_E
417    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
418
419    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
420
421    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
422    ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
423
424    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
425    str xIP0, [sp]
426    // Place sp in Thread::Current()->top_quick_frame.
427    mov xIP0, sp
428    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
429.endm
430
431.macro RESTORE_SAVE_EVERYTHING_FRAME
432    // Restore FP registers.
433    ldp d0, d1,   [sp, #8]
434    ldp d2, d3,   [sp, #24]
435    ldp d4, d5,   [sp, #40]
436    ldp d6, d7,   [sp, #56]
437    ldp d8, d9,   [sp, #72]
438    ldp d10, d11, [sp, #88]
439    ldp d12, d13, [sp, #104]
440    ldp d14, d15, [sp, #120]
441    ldp d16, d17, [sp, #136]
442    ldp d18, d19, [sp, #152]
443    ldp d20, d21, [sp, #168]
444    ldp d22, d23, [sp, #184]
445    ldp d24, d25, [sp, #200]
446    ldp d26, d27, [sp, #216]
447    ldp d28, d29, [sp, #232]
448    ldp d30, d31, [sp, #248]
449
450    // Restore core registers.
451    ldr x0,       [sp, #264]
452    .cfi_restore x0
453
454    ldp x1, x2,   [sp, #272]
455    .cfi_restore x1
456    .cfi_restore x2
457
458    ldp x3, x4,   [sp, #288]
459    .cfi_restore x3
460    .cfi_restore x4
461
462    ldp x5, x6,   [sp, #304]
463    .cfi_restore x5
464    .cfi_restore x6
465
466    ldp x7, x8,   [sp, #320]
467    .cfi_restore x7
468    .cfi_restore x8
469
470    ldp x9, x10,  [sp, #336]
471    .cfi_restore x9
472    .cfi_restore x10
473
474    ldp x11, x12, [sp, #352]
475    .cfi_restore x11
476    .cfi_restore x12
477
478    ldp x13, x14, [sp, #368]
479    .cfi_restore x13
480    .cfi_restore x14
481
482    ldp x15, x16, [sp, #384]
483    .cfi_restore x15
484    .cfi_restore x16
485
486    ldp x17, x18, [sp, #400]
487    .cfi_restore x17
488    .cfi_restore x18
489
490    ldp x19, x20, [sp, #416]
491    .cfi_restore x19
492    .cfi_restore x20
493
494    ldp x21, x22, [sp, #432]
495    .cfi_restore x21
496    .cfi_restore x22
497
498    ldp x23, x24, [sp, #448]
499    .cfi_restore x23
500    .cfi_restore x24
501
502    ldp x25, x26, [sp, #464]
503    .cfi_restore x25
504    .cfi_restore x26
505
506    ldp x27, x28, [sp, #480]
507    .cfi_restore x27
508    .cfi_restore x28
509
510    ldp x29, xLR, [sp, #496]
511    .cfi_restore x29
512    .cfi_restore x30
513
514    add sp, sp, #512
515    .cfi_adjust_cfa_offset -512
516.endm
517
518.macro RETURN_IF_RESULT_IS_ZERO
519    cbnz x0, 1f                // result non-zero branch over
520    ret                        // return
5211:
522.endm
523
524.macro RETURN_IF_RESULT_IS_NON_ZERO
525    cbz x0, 1f                 // result zero branch over
526    ret                        // return
5271:
528.endm
529
530    /*
531     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
532     * exception is Thread::Current()->exception_
533     */
534.macro DELIVER_PENDING_EXCEPTION
535    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
536    mov x0, xSELF
537
538    // Point of no return.
539    b artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*)
540    brk 0  // Unreached
541.endm
542
543.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
544    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
545    cbnz \reg, 1f
546    ret
5471:
548    DELIVER_PENDING_EXCEPTION
549.endm
550
551.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
552    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0
553.endm
554
555// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
556.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
557    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
558.endm
559
560.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
561    cbnz w0, 1f                // result non-zero branch over
562    ret                        // return
5631:
564    DELIVER_PENDING_EXCEPTION
565.endm
566
567.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
568    .extern \cxx_name
569ENTRY \c_name
570    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
571    mov x0, xSELF                     // pass Thread::Current
572    b   \cxx_name                     // \cxx_name(Thread*)
573END \c_name
574.endm
575
576.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
577    .extern \cxx_name
578ENTRY \c_name
579    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
580    mov x1, xSELF                     // pass Thread::Current.
581    b   \cxx_name                     // \cxx_name(arg, Thread*).
582    brk 0
583END \c_name
584.endm
585
586.macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
587    .extern \cxx_name
588ENTRY \c_name
589    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
590    mov x2, xSELF                     // pass Thread::Current
591    b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
592    brk 0
593END \c_name
594.endm
595
596    /*
597     * Called by managed code, saves callee saves and then calls artThrowException
598     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
599     */
600ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
601
602    /*
603     * Called by managed code to create and deliver a NullPointerException.
604     */
605NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
606
607    /*
608     * Call installed by a signal handler to create and deliver a NullPointerException.
609     */
610ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
611
612    /*
613     * Called by managed code to create and deliver an ArithmeticException.
614     */
615NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
616
617    /*
618     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
619     * index, arg2 holds limit.
620     */
621TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
622
623    /*
624     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
625     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
626     */
627TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
628
629    /*
630     * Called by managed code to create and deliver a StackOverflowError.
631     */
632NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
633
634    /*
635     * Called by managed code to create and deliver a NoSuchMethodError.
636     */
637ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
638
639    /*
640     * All generated callsites for interface invokes and invocation slow paths will load arguments
641     * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
642     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
643     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
644     *
645     * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
646     * of the target Method* in x0 and method->code_ in x1.
647     *
648     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
649     * thread and we branch to another stub to deliver it.
650     *
651     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
652     * pointing back to the original caller.
653     *
654     * Adapted from ARM32 code.
655     *
656     * Clobbers xIP0.
657     */
658.macro INVOKE_TRAMPOLINE_BODY cxx_name
659    .extern \cxx_name
660    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
661    // Helper signature is always
662    // (method_idx, *this_object, *caller_method, *self, sp)
663
664    mov    x2, xSELF                      // pass Thread::Current
665    mov    x3, sp
666    bl     \cxx_name                      // (method_idx, this, Thread*, SP)
667    mov    xIP0, x1                       // save Method*->code_
668    RESTORE_SAVE_REFS_AND_ARGS_FRAME
669    cbz    x0, 1f                         // did we find the target? if not go to exception delivery
670    br     xIP0                           // tail call to target
6711:
672    DELIVER_PENDING_EXCEPTION
673.endm
674.macro INVOKE_TRAMPOLINE c_name, cxx_name
675ENTRY \c_name
676    INVOKE_TRAMPOLINE_BODY \cxx_name
677END \c_name
678.endm
679
680INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
681
682INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
683INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
684INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
685INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
686
687
688.macro INVOKE_STUB_CREATE_FRAME
689
690SAVE_SIZE=15*8   // x4, x5, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
691SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
692
693
694    mov x9, sp                             // Save stack pointer.
695    .cfi_register sp,x9
696
697    add x10, x2, # SAVE_SIZE_AND_METHOD    // calculate size of frame.
698    sub x10, sp, x10                       // Calculate SP position - saves + ArtMethod* + args
699    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
700    mov sp, x10                            // Set new SP.
701
702    sub x10, x9, #SAVE_SIZE                // Calculate new FP (later). Done here as we must move SP
703    .cfi_def_cfa_register x10              // before this.
704    .cfi_adjust_cfa_offset SAVE_SIZE
705
706    str x28, [x10, #112]
707    .cfi_rel_offset x28, 112
708
709    stp x26, x27, [x10, #96]
710    .cfi_rel_offset x26, 96
711    .cfi_rel_offset x27, 104
712
713    stp x24, x25, [x10, #80]
714    .cfi_rel_offset x24, 80
715    .cfi_rel_offset x25, 88
716
717    stp x22, x23, [x10, #64]
718    .cfi_rel_offset x22, 64
719    .cfi_rel_offset x23, 72
720
721    stp x20, x21, [x10, #48]
722    .cfi_rel_offset x20, 48
723    .cfi_rel_offset x21, 56
724
725    stp x9, x19, [x10, #32]                // Save old stack pointer and x19.
726    .cfi_rel_offset sp, 32
727    .cfi_rel_offset x19, 40
728
729    stp x4, x5, [x10, #16]                 // Save result and shorty addresses.
730    .cfi_rel_offset x4, 16
731    .cfi_rel_offset x5, 24
732
733    stp xFP, xLR, [x10]                    // Store LR & FP.
734    .cfi_rel_offset x29, 0
735    .cfi_rel_offset x30, 8
736
737    mov xFP, x10                           // Use xFP now, as it's callee-saved.
738    .cfi_def_cfa_register x29
739    mov xSELF, x3                          // Move thread pointer into SELF register.
740
741    // Copy arguments into stack frame.
742    // Use simple copy routine for now.
743    // 4 bytes per slot.
744    // X1 - source address
745    // W2 - args length
746    // X9 - destination address.
747    // W10 - temporary
748    add x9, sp, #8                         // Destination address is bottom of stack + null.
749
750    // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
751    // does not have unique-id variables.
7521:
753    cmp w2, #0
754    beq 2f
755    sub w2, w2, #4      // Need 65536 bytes of range.
756    ldr w10, [x1, x2]
757    str w10, [x9, x2]
758
759    b 1b
760
7612:
762    // Store null into ArtMethod* at bottom of frame.
763    str xzr, [sp]
764.endm
765
766.macro INVOKE_STUB_CALL_AND_RETURN
767
768    // load method-> METHOD_QUICK_CODE_OFFSET
769    ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
770    // Branch to method.
771    blr x9
772
773    // Restore return value address and shorty address.
774    ldp x4,x5, [xFP, #16]
775    .cfi_restore x4
776    .cfi_restore x5
777
778    ldr x28, [xFP, #112]
779    .cfi_restore x28
780
781    ldp x26, x27, [xFP, #96]
782    .cfi_restore x26
783    .cfi_restore x27
784
785    ldp x24, x25, [xFP, #80]
786    .cfi_restore x24
787    .cfi_restore x25
788
789    ldp x22, x23, [xFP, #64]
790    .cfi_restore x22
791    .cfi_restore x23
792
793    ldp x20, x21, [xFP, #48]
794    .cfi_restore x20
795    .cfi_restore x21
796
797    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
798    ldrb w10, [x5]
799
800    // Check the return type and store the correct register into the jvalue in memory.
801    // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
802
803    // Don't set anything for a void type.
804    cmp w10, #'V'
805    beq 3f
806
807    // Is it a double?
808    cmp w10, #'D'
809    bne 1f
810    str d0, [x4]
811    b 3f
812
8131:  // Is it a float?
814    cmp w10, #'F'
815    bne 2f
816    str s0, [x4]
817    b 3f
818
8192:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
820    str x0, [x4]
821
8223:  // Finish up.
823    ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
824    .cfi_restore x19
825    mov sp, x2
826    .cfi_restore sp
827
828    ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
829    .cfi_restore x29
830    .cfi_restore x30
831
832    ret
833
834.endm
835
836
837/*
838 *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
839 *                                       uint32_t  *args,     x1
840 *                                       uint32_t argsize,    w2
841 *                                       Thread *self,        x3
842 *                                       JValue *result,      x4
843 *                                       char   *shorty);     x5
844 *  +----------------------+
845 *  |                      |
846 *  |  C/C++ frame         |
847 *  |       LR''           |
848 *  |       FP''           | <- SP'
849 *  +----------------------+
850 *  +----------------------+
851 *  |        x28           | <- TODO: Remove callee-saves.
852 *  |         :            |
853 *  |        x19           |
854 *  |        SP'           |
855 *  |        X5            |
856 *  |        X4            |        Saved registers
857 *  |        LR'           |
858 *  |        FP'           | <- FP
859 *  +----------------------+
860 *  | uint32_t out[n-1]    |
861 *  |    :      :          |        Outs
862 *  | uint32_t out[0]      |
863 *  | ArtMethod*           | <- SP  value=null
864 *  +----------------------+
865 *
866 * Outgoing registers:
867 *  x0    - Method*
868 *  x1-x7 - integer parameters.
869 *  d0-d7 - Floating point parameters.
870 *  xSELF = self
871 *  SP = & of ArtMethod*
872 *  x1 = "this" pointer.
873 *
874 */
875ENTRY art_quick_invoke_stub
876    // Spill registers as per AACPS64 calling convention.
877    INVOKE_STUB_CREATE_FRAME
878
879    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
880    // Parse the passed shorty to determine which register to load.
881    // Load addresses for routines that load WXSD registers.
882    adr  x11, .LstoreW2
883    adr  x12, .LstoreX2
884    adr  x13, .LstoreS0
885    adr  x14, .LstoreD0
886
887    // Initialize routine offsets to 0 for integers and floats.
888    // x8 for integers, x15 for floating point.
889    mov x8, #0
890    mov x15, #0
891
892    add x10, x5, #1         // Load shorty address, plus one to skip return value.
893    ldr w1, [x9],#4         // Load "this" parameter, and increment arg pointer.
894
895    // Loop to fill registers.
896.LfillRegisters:
897    ldrb w17, [x10], #1       // Load next character in signature, and increment.
898    cbz w17, .LcallFunction   // Exit at end of signature. Shorty 0 terminated.
899
900    cmp  w17, #'F' // is this a float?
901    bne .LisDouble
902
903    cmp x15, # 8*12         // Skip this load if all registers full.
904    beq .Ladvance4
905
906    add x17, x13, x15       // Calculate subroutine to jump to.
907    br  x17
908
909.LisDouble:
910    cmp w17, #'D'           // is this a double?
911    bne .LisLong
912
913    cmp x15, # 8*12         // Skip this load if all registers full.
914    beq .Ladvance8
915
916    add x17, x14, x15       // Calculate subroutine to jump to.
917    br x17
918
919.LisLong:
920    cmp w17, #'J'           // is this a long?
921    bne .LisOther
922
923    cmp x8, # 6*12          // Skip this load if all registers full.
924    beq .Ladvance8
925
926    add x17, x12, x8        // Calculate subroutine to jump to.
927    br x17
928
929.LisOther:                  // Everything else takes one vReg.
930    cmp x8, # 6*12          // Skip this load if all registers full.
931    beq .Ladvance4
932
933    add x17, x11, x8        // Calculate subroutine to jump to.
934    br x17
935
936.Ladvance4:
937    add x9, x9, #4
938    b .LfillRegisters
939
940.Ladvance8:
941    add x9, x9, #8
942    b .LfillRegisters
943
944// Macro for loading a parameter into a register.
945//  counter - the register with offset into these tables
946//  size - the size of the register - 4 or 8 bytes.
947//  register - the name of the register to be loaded.
948.macro LOADREG counter size register return
949    ldr \register , [x9], #\size
950    add \counter, \counter, 12
951    b \return
952.endm
953
954// Store ints.
955.LstoreW2:
956    LOADREG x8 4 w2 .LfillRegisters
957    LOADREG x8 4 w3 .LfillRegisters
958    LOADREG x8 4 w4 .LfillRegisters
959    LOADREG x8 4 w5 .LfillRegisters
960    LOADREG x8 4 w6 .LfillRegisters
961    LOADREG x8 4 w7 .LfillRegisters
962
963// Store longs.
964.LstoreX2:
965    LOADREG x8 8 x2 .LfillRegisters
966    LOADREG x8 8 x3 .LfillRegisters
967    LOADREG x8 8 x4 .LfillRegisters
968    LOADREG x8 8 x5 .LfillRegisters
969    LOADREG x8 8 x6 .LfillRegisters
970    LOADREG x8 8 x7 .LfillRegisters
971
972// Store singles.
973.LstoreS0:
974    LOADREG x15 4 s0 .LfillRegisters
975    LOADREG x15 4 s1 .LfillRegisters
976    LOADREG x15 4 s2 .LfillRegisters
977    LOADREG x15 4 s3 .LfillRegisters
978    LOADREG x15 4 s4 .LfillRegisters
979    LOADREG x15 4 s5 .LfillRegisters
980    LOADREG x15 4 s6 .LfillRegisters
981    LOADREG x15 4 s7 .LfillRegisters
982
983// Store doubles.
984.LstoreD0:
985    LOADREG x15 8 d0 .LfillRegisters
986    LOADREG x15 8 d1 .LfillRegisters
987    LOADREG x15 8 d2 .LfillRegisters
988    LOADREG x15 8 d3 .LfillRegisters
989    LOADREG x15 8 d4 .LfillRegisters
990    LOADREG x15 8 d5 .LfillRegisters
991    LOADREG x15 8 d6 .LfillRegisters
992    LOADREG x15 8 d7 .LfillRegisters
993
994
995.LcallFunction:
996
997    INVOKE_STUB_CALL_AND_RETURN
998
999END art_quick_invoke_stub
1000
1001/*  extern"C"
1002 *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
1003 *                                       uint32_t  *args,     x1
1004 *                                       uint32_t argsize,    w2
1005 *                                       Thread *self,        x3
1006 *                                       JValue *result,      x4
1007 *                                       char   *shorty);     x5
1008 */
1009ENTRY art_quick_invoke_static_stub
1010    // Spill registers as per AACPS64 calling convention.
1011    INVOKE_STUB_CREATE_FRAME
1012
1013    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
1014    // Parse the passed shorty to determine which register to load.
1015    // Load addresses for routines that load WXSD registers.
1016    adr  x11, .LstoreW1_2
1017    adr  x12, .LstoreX1_2
1018    adr  x13, .LstoreS0_2
1019    adr  x14, .LstoreD0_2
1020
1021    // Initialize routine offsets to 0 for integers and floats.
1022    // x8 for integers, x15 for floating point.
1023    mov x8, #0
1024    mov x15, #0
1025
1026    add x10, x5, #1     // Load shorty address, plus one to skip return value.
1027
1028    // Loop to fill registers.
1029.LfillRegisters2:
1030    ldrb w17, [x10], #1         // Load next character in signature, and increment.
1031    cbz w17, .LcallFunction2    // Exit at end of signature. Shorty 0 terminated.
1032
1033    cmp  w17, #'F'          // is this a float?
1034    bne .LisDouble2
1035
1036    cmp x15, # 8*12         // Skip this load if all registers full.
1037    beq .Ladvance4_2
1038
1039    add x17, x13, x15       // Calculate subroutine to jump to.
1040    br  x17
1041
1042.LisDouble2:
1043    cmp w17, #'D'           // is this a double?
1044    bne .LisLong2
1045
1046    cmp x15, # 8*12         // Skip this load if all registers full.
1047    beq .Ladvance8_2
1048
1049    add x17, x14, x15       // Calculate subroutine to jump to.
1050    br x17
1051
1052.LisLong2:
1053    cmp w17, #'J'           // is this a long?
1054    bne .LisOther2
1055
1056    cmp x8, # 7*12          // Skip this load if all registers full.
1057    beq .Ladvance8_2
1058
1059    add x17, x12, x8        // Calculate subroutine to jump to.
1060    br x17
1061
1062.LisOther2:                 // Everything else takes one vReg.
1063    cmp x8, # 7*12          // Skip this load if all registers full.
1064    beq .Ladvance4_2
1065
1066    add x17, x11, x8        // Calculate subroutine to jump to.
1067    br x17
1068
1069.Ladvance4_2:
1070    add x9, x9, #4
1071    b .LfillRegisters2
1072
1073.Ladvance8_2:
1074    add x9, x9, #8
1075    b .LfillRegisters2
1076
1077// Store ints.
1078.LstoreW1_2:
1079    LOADREG x8 4 w1 .LfillRegisters2
1080    LOADREG x8 4 w2 .LfillRegisters2
1081    LOADREG x8 4 w3 .LfillRegisters2
1082    LOADREG x8 4 w4 .LfillRegisters2
1083    LOADREG x8 4 w5 .LfillRegisters2
1084    LOADREG x8 4 w6 .LfillRegisters2
1085    LOADREG x8 4 w7 .LfillRegisters2
1086
1087// Store longs.
1088.LstoreX1_2:
1089    LOADREG x8 8 x1 .LfillRegisters2
1090    LOADREG x8 8 x2 .LfillRegisters2
1091    LOADREG x8 8 x3 .LfillRegisters2
1092    LOADREG x8 8 x4 .LfillRegisters2
1093    LOADREG x8 8 x5 .LfillRegisters2
1094    LOADREG x8 8 x6 .LfillRegisters2
1095    LOADREG x8 8 x7 .LfillRegisters2
1096
1097// Store singles.
1098.LstoreS0_2:
1099    LOADREG x15 4 s0 .LfillRegisters2
1100    LOADREG x15 4 s1 .LfillRegisters2
1101    LOADREG x15 4 s2 .LfillRegisters2
1102    LOADREG x15 4 s3 .LfillRegisters2
1103    LOADREG x15 4 s4 .LfillRegisters2
1104    LOADREG x15 4 s5 .LfillRegisters2
1105    LOADREG x15 4 s6 .LfillRegisters2
1106    LOADREG x15 4 s7 .LfillRegisters2
1107
1108// Store doubles.
1109.LstoreD0_2:
1110    LOADREG x15 8 d0 .LfillRegisters2
1111    LOADREG x15 8 d1 .LfillRegisters2
1112    LOADREG x15 8 d2 .LfillRegisters2
1113    LOADREG x15 8 d3 .LfillRegisters2
1114    LOADREG x15 8 d4 .LfillRegisters2
1115    LOADREG x15 8 d5 .LfillRegisters2
1116    LOADREG x15 8 d6 .LfillRegisters2
1117    LOADREG x15 8 d7 .LfillRegisters2
1118
1119
1120.LcallFunction2:
1121
1122    INVOKE_STUB_CALL_AND_RETURN
1123
1124END art_quick_invoke_static_stub
1125
1126
1127
1128/*  extern"C" void art_quick_osr_stub(void** stack,                x0
1129 *                                    size_t stack_size_in_bytes,  x1
1130 *                                    const uin8_t* native_pc,     x2
1131 *                                    JValue *result,              x3
1132 *                                    char   *shorty,              x4
1133 *                                    Thread *self)                x5
1134 */
1135ENTRY art_quick_osr_stub
1136SAVE_SIZE=15*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
1137    mov x9, sp                             // Save stack pointer.
1138    .cfi_register sp,x9
1139
1140    sub x10, sp, # SAVE_SIZE
1141    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
1142    mov sp, x10                            // Set new SP.
1143
1144    str x28, [sp, #112]
1145    stp x26, x27, [sp, #96]
1146    stp x24, x25, [sp, #80]
1147    stp x22, x23, [sp, #64]
1148    stp x20, x21, [sp, #48]
1149    stp x9, x19, [sp, #32]                // Save old stack pointer and x19.
1150    stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
1151    stp xFP, xLR, [sp]                    // Store LR & FP.
1152    mov xSELF, x5                         // Move thread pointer into SELF register.
1153
1154    sub sp, sp, #16
1155    str xzr, [sp]                         // Store null for ArtMethod* slot
1156    // Branch to stub.
1157    bl .Losr_entry
1158    add sp, sp, #16
1159
1160    // Restore return value address and shorty address.
1161    ldp x3,x4, [sp, #16]
1162    ldr x28, [sp, #112]
1163    ldp x26, x27, [sp, #96]
1164    ldp x24, x25, [sp, #80]
1165    ldp x22, x23, [sp, #64]
1166    ldp x20, x21, [sp, #48]
1167
1168    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
1169    ldrb w10, [x4]
1170
1171    // Check the return type and store the correct register into the jvalue in memory.
1172
1173    // Don't set anything for a void type.
1174    cmp w10, #'V'
1175    beq .Losr_exit
1176
1177    // Is it a double?
1178    cmp w10, #'D'
1179    bne .Lno_double
1180    str d0, [x3]
1181    b .Losr_exit
1182
1183.Lno_double:  // Is it a float?
1184    cmp w10, #'F'
1185    bne .Lno_float
1186    str s0, [x3]
1187    b .Losr_exit
1188
1189.Lno_float:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
1190    str x0, [x3]
1191
1192.Losr_exit:  // Finish up.
1193    ldp x2, x19, [sp, #32]   // Restore stack pointer and x19.
1194    ldp xFP, xLR, [sp]    // Restore old frame pointer and link register.
1195    mov sp, x2
1196    ret
1197
1198.Losr_entry:
1199    // Update stack pointer for the callee
1200    sub sp, sp, x1
1201
1202    // Update link register slot expected by the callee.
1203    sub w1, w1, #8
1204    str lr, [sp, x1]
1205
1206    // Copy arguments into stack frame.
1207    // Use simple copy routine for now.
1208    // 4 bytes per slot.
1209    // X0 - source address
1210    // W1 - args length
1211    // SP - destination address.
1212    // W10 - temporary
1213.Losr_loop_entry:
1214    cmp w1, #0
1215    beq .Losr_loop_exit
1216    sub w1, w1, #4
1217    ldr w10, [x0, x1]
1218    str w10, [sp, x1]
1219    b .Losr_loop_entry
1220
1221.Losr_loop_exit:
1222    // Branch to the OSR entry point.
1223    br x2
1224
1225END art_quick_osr_stub
1226
1227    /*
1228     * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
1229     */
1230
1231ENTRY art_quick_do_long_jump
1232    // Load FPRs
1233    ldp d0, d1, [x1], #16
1234    ldp d2, d3, [x1], #16
1235    ldp d4, d5, [x1], #16
1236    ldp d6, d7, [x1], #16
1237    ldp d8, d9, [x1], #16
1238    ldp d10, d11, [x1], #16
1239    ldp d12, d13, [x1], #16
1240    ldp d14, d15, [x1], #16
1241    ldp d16, d17, [x1], #16
1242    ldp d18, d19, [x1], #16
1243    ldp d20, d21, [x1], #16
1244    ldp d22, d23, [x1], #16
1245    ldp d24, d25, [x1], #16
1246    ldp d26, d27, [x1], #16
1247    ldp d28, d29, [x1], #16
1248    ldp d30, d31, [x1]
1249
1250    // Load GPRs
1251    // TODO: lots of those are smashed, could optimize.
1252    add x0, x0, #30*8
1253    ldp x30, x1, [x0], #-16          // LR & SP
1254    ldp x28, x29, [x0], #-16
1255    ldp x26, x27, [x0], #-16
1256    ldp x24, x25, [x0], #-16
1257    ldp x22, x23, [x0], #-16
1258    ldp x20, x21, [x0], #-16
1259    ldp x18, x19, [x0], #-16
1260    ldp x16, x17, [x0], #-16
1261    ldp x14, x15, [x0], #-16
1262    ldp x12, x13, [x0], #-16
1263    ldp x10, x11, [x0], #-16
1264    ldp x8, x9, [x0], #-16
1265    ldp x6, x7, [x0], #-16
1266    ldp x4, x5, [x0], #-16
1267    ldp x2, x3, [x0], #-16
1268    mov sp, x1
1269
1270    // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
1271    ldr x1, [x0, #33*8]
1272    // And the value of x0.
1273    ldr x0, [x0]
1274
1275    br  x1
1276END art_quick_do_long_jump
1277
1278    /*
1279     * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
1280     * possibly null object to lock.
1281     *
1282     * Derived from arm32 code.
1283     */
1284    .extern artLockObjectFromCode
1285ENTRY art_quick_lock_object
1286    cbz    w0, .Lslow_lock
1287    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
1288.Lretry_lock:
1289    ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
1290    ldxr   w1, [x4]
1291    mov    x3, x1
1292    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1293    cbnz   w3, .Lnot_unlocked         // already thin locked
1294    // unlocked case - x1: original lock word that's zero except for the read barrier bits.
1295    orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
1296    stxr   w3, w2, [x4]
1297    cbnz   w3, .Llock_stxr_fail       // store failed, retry
1298    dmb    ishld                      // full (LoadLoad|LoadStore) memory barrier
1299    ret
1300.Lnot_unlocked:  // x1: original lock word
1301    lsr    w3, w1, LOCK_WORD_STATE_SHIFT
1302    cbnz   w3, .Lslow_lock            // if either of the top two bits are set, go slow path
1303    eor    w2, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
1304    uxth   w2, w2                     // zero top 16 bits
1305    cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
1306                                      // else contention, go to slow path
1307    mov    x3, x1                     // copy the lock word to check count overflow.
1308    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
1309    add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
1310    lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
1311    cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
1312    add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
1313    stxr   w3, w2, [x4]
1314    cbnz   w3, .Llock_stxr_fail       // store failed, retry
1315    ret
1316.Llock_stxr_fail:
1317    b      .Lretry_lock               // retry
1318.Lslow_lock:
1319    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1320    mov    x1, xSELF                  // pass Thread::Current
1321    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1322    RESTORE_SAVE_REFS_ONLY_FRAME
1323    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1324END art_quick_lock_object
1325
1326ENTRY art_quick_lock_object_no_inline
1327    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1328    mov    x1, xSELF                  // pass Thread::Current
1329    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1330    RESTORE_SAVE_REFS_ONLY_FRAME
1331    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1332END art_quick_lock_object_no_inline
1333
1334    /*
1335     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
1336     * x0 holds the possibly null object to lock.
1337     *
1338     * Derived from arm32 code.
1339     */
1340    .extern artUnlockObjectFromCode
1341ENTRY art_quick_unlock_object
1342    cbz    x0, .Lslow_unlock
1343    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
1344.Lretry_unlock:
1345#ifndef USE_READ_BARRIER
1346    ldr    w1, [x4]
1347#else
1348    ldxr   w1, [x4]                   // Need to use atomic instructions for read barrier
1349#endif
1350    lsr    w2, w1, LOCK_WORD_STATE_SHIFT
1351    cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
1352    ldr    w2, [xSELF, #THREAD_ID_OFFSET]
1353    mov    x3, x1                     // copy lock word to check thread id equality
1354    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1355    eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
1356    uxth   w3, w3                     // zero top 16 bits
1357    cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
1358    mov    x3, x1                     // copy lock word to detect transition to unlocked
1359    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1360    cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
1361    bpl    .Lrecursive_thin_unlock
1362    // transition to unlocked
1363    mov    x3, x1
1364    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
1365    dmb    ish                        // full (LoadStore|StoreStore) memory barrier
1366#ifndef USE_READ_BARRIER
1367    str    w3, [x4]
1368#else
1369    stxr   w2, w3, [x4]               // Need to use atomic instructions for read barrier
1370    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
1371#endif
1372    ret
1373.Lrecursive_thin_unlock:  // w1: original lock word
1374    sub    w1, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
1375#ifndef USE_READ_BARRIER
1376    str    w1, [x4]
1377#else
1378    stxr   w2, w1, [x4]               // Need to use atomic instructions for read barrier
1379    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
1380#endif
1381    ret
1382.Lunlock_stxr_fail:
1383    b      .Lretry_unlock               // retry
1384.Lslow_unlock:
1385    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1386    mov    x1, xSELF                  // pass Thread::Current
1387    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1388    RESTORE_SAVE_REFS_ONLY_FRAME
1389    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1390END art_quick_unlock_object
1391
1392ENTRY art_quick_unlock_object_no_inline
1393    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1394    mov    x1, xSELF                  // pass Thread::Current
1395    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1396    RESTORE_SAVE_REFS_ONLY_FRAME
1397    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1398END art_quick_unlock_object_no_inline
1399
1400    /*
1401     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
1402     * artThrowClassCastException.
1403     */
1404    .extern artThrowClassCastException
1405ENTRY art_quick_check_cast
1406    // Store arguments and link register
1407    // Stack needs to be 16B aligned on calls.
1408    stp x0, x1, [sp,#-32]!
1409    .cfi_adjust_cfa_offset 32
1410    .cfi_rel_offset x0, 0
1411    .cfi_rel_offset x1, 8
1412    str xLR, [sp, #24]
1413    .cfi_rel_offset x30, 24
1414
1415    // Call runtime code
1416    bl artIsAssignableFromCode
1417
1418    // Check for exception
1419    cbz x0, .Lthrow_class_cast_exception
1420
1421    // Restore and return
1422    ldr xLR, [sp, #24]
1423    .cfi_restore x30
1424    ldp x0, x1, [sp], #32
1425    .cfi_restore x0
1426    .cfi_restore x1
1427    .cfi_adjust_cfa_offset -32
1428    ret
1429
1430    .cfi_adjust_cfa_offset 32         // Reset unwind info so following code unwinds.
1431
1432.Lthrow_class_cast_exception:
1433    // Restore
1434    ldr xLR, [sp, #24]
1435    .cfi_restore x30
1436    ldp x0, x1, [sp], #32
1437    .cfi_restore x0
1438    .cfi_restore x1
1439    .cfi_adjust_cfa_offset -32
1440
1441    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1442    mov x2, xSELF                     // pass Thread::Current
1443    b artThrowClassCastException      // (Class*, Class*, Thread*)
1444    brk 0                             // We should not return here...
1445END art_quick_check_cast
1446
1447// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
1448.macro POP_REG_NE xReg, offset, xExclude
1449    .ifnc \xReg, \xExclude
1450        ldr \xReg, [sp, #\offset]     // restore xReg
1451        .cfi_restore \xReg
1452    .endif
1453.endm
1454
1455// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
1456// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
1457.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
1458    .ifc \xReg1, \xExclude
1459        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
1460    .else
1461        .ifc \xReg2, \xExclude
1462            ldr \xReg1, [sp, #\offset]          // restore xReg1
1463        .else
1464            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
1465        .endif
1466    .endif
1467    .cfi_restore \xReg1
1468    .cfi_restore \xReg2
1469.endm
1470
1471    /*
1472     * Macro to insert read barrier, only used in art_quick_aput_obj.
1473     * xDest, wDest and xObj are registers, offset is a defined literal such as
1474     * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle
1475     * name mismatch between instructions. This macro uses the lower 32b of register when possible.
1476     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1477     */
1478.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
1479#ifdef USE_READ_BARRIER
1480#ifdef USE_BAKER_READ_BARRIER
1481    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
1482    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
1483    // False dependency to avoid needing load/load fence.
1484    add \xObj, \xObj, \xTemp, lsr #32
1485    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1486    UNPOISON_HEAP_REF \wDest
1487    b .Lrb_exit\number
1488#endif
1489.Lrb_slowpath\number:
1490    // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
1491    stp x0, x1, [sp, #-48]!
1492    .cfi_adjust_cfa_offset 48
1493    .cfi_rel_offset x0, 0
1494    .cfi_rel_offset x1, 8
1495    stp x2, x3, [sp, #16]
1496    .cfi_rel_offset x2, 16
1497    .cfi_rel_offset x3, 24
1498    stp x4, xLR, [sp, #32]
1499    .cfi_rel_offset x4, 32
1500    .cfi_rel_offset x30, 40
1501
1502    // mov x0, \xRef                // pass ref in x0 (no-op for now since parameter ref is unused)
1503    .ifnc \xObj, x1
1504        mov x1, \xObj               // pass xObj
1505    .endif
1506    mov w2, #\offset                // pass offset
1507    bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
1508    // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
1509    .ifnc \wDest, w0
1510        mov \wDest, w0              // save return value in wDest
1511    .endif
1512
1513    // Conditionally restore saved registers
1514    POP_REG_NE x0, 0, \xDest
1515    POP_REG_NE x1, 8, \xDest
1516    POP_REG_NE x2, 16, \xDest
1517    POP_REG_NE x3, 24, \xDest
1518    POP_REG_NE x4, 32, \xDest
1519    ldr xLR, [sp, #40]
1520    .cfi_restore x30
1521    add sp, sp, #48
1522    .cfi_adjust_cfa_offset -48
1523.Lrb_exit\number:
1524#else
1525    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1526    UNPOISON_HEAP_REF \wDest
1527#endif  // USE_READ_BARRIER
1528.endm
1529
1530    /*
1531     * Entry from managed code for array put operations of objects where the value being stored
1532     * needs to be checked for compatibility.
1533     * x0 = array, x1 = index, x2 = value
1534     *
1535     * Currently all values should fit into w0/w1/w2, and w1 always will as indices are 32b. We
1536     * assume, though, that the upper 32b are zeroed out. At least for x1/w1 we can do better by
1537     * using index-zero-extension in load/stores.
1538     *
1539     * Temporaries: x3, x4
1540     * TODO: x4 OK? ip seems wrong here.
1541     */
1542ENTRY art_quick_aput_obj_with_null_and_bound_check
1543    tst x0, x0
1544    bne art_quick_aput_obj_with_bound_check
1545    b art_quick_throw_null_pointer_exception
1546END art_quick_aput_obj_with_null_and_bound_check
1547
1548ENTRY art_quick_aput_obj_with_bound_check
1549    ldr w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]
1550    cmp w3, w1
1551    bhi art_quick_aput_obj
1552    mov x0, x1
1553    mov x1, x3
1554    b art_quick_throw_array_bounds
1555END art_quick_aput_obj_with_bound_check
1556
1557#ifdef USE_READ_BARRIER
1558    .extern artReadBarrierSlow
1559#endif
1560ENTRY art_quick_aput_obj
1561    cbz x2, .Ldo_aput_null
1562    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
1563                                                                    // This also zero-extends to x3
1564    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
1565    // This also zero-extends to x3
1566    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
1567                                                                    // This also zero-extends to x4
1568    cmp w3, w4  // value's type == array's component type - trivial assignability
1569    bne .Lcheck_assignability
1570.Ldo_aput:
1571    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1572                                                         // "Compress" = do nothing
1573    POISON_HEAP_REF w2
1574    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1575    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1576    lsr x0, x0, #7
1577    strb w3, [x3, x0]
1578    ret
1579.Ldo_aput_null:
1580    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1581                                                         // "Compress" = do nothing
1582    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1583    ret
1584.Lcheck_assignability:
1585    // Store arguments and link register
1586    stp x0, x1, [sp,#-32]!
1587    .cfi_adjust_cfa_offset 32
1588    .cfi_rel_offset x0, 0
1589    .cfi_rel_offset x1, 8
1590    stp x2, xLR, [sp, #16]
1591    .cfi_rel_offset x2, 16
1592    .cfi_rel_offset x30, 24
1593
1594    // Call runtime code
1595    mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1596    mov x1, x4              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1597    bl artIsAssignableFromCode
1598
1599    // Check for exception
1600    cbz x0, .Lthrow_array_store_exception
1601
1602    // Restore
1603    ldp x2, x30, [sp, #16]
1604    .cfi_restore x2
1605    .cfi_restore x30
1606    ldp x0, x1, [sp], #32
1607    .cfi_restore x0
1608    .cfi_restore x1
1609    .cfi_adjust_cfa_offset -32
1610
1611    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1612                                                          // "Compress" = do nothing
1613    POISON_HEAP_REF w2
1614    str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
1615    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1616    lsr x0, x0, #7
1617    strb w3, [x3, x0]
1618    ret
1619    .cfi_adjust_cfa_offset 32  // 4 restores after cbz for unwinding.
1620.Lthrow_array_store_exception:
1621    ldp x2, x30, [sp, #16]
1622    .cfi_restore x2
1623    .cfi_restore x30
1624    ldp x0, x1, [sp], #32
1625    .cfi_restore x0
1626    .cfi_restore x1
1627    .cfi_adjust_cfa_offset -32
1628
1629    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
1630    mov x1, x2                    // Pass value.
1631    mov x2, xSELF                 // Pass Thread::Current.
1632    b artThrowArrayStoreException // (Object*, Object*, Thread*).
1633    brk 0                         // Unreached.
1634END art_quick_aput_obj
1635
1636// Macro to facilitate adding new allocation entrypoints.
1637.macro ONE_ARG_DOWNCALL name, entrypoint, return
1638    .extern \entrypoint
1639ENTRY \name
1640    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1641    mov    x1, xSELF                  // pass Thread::Current
1642    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1643    RESTORE_SAVE_REFS_ONLY_FRAME
1644    \return
1645END \name
1646.endm
1647
1648// Macro to facilitate adding new allocation entrypoints.
1649.macro TWO_ARG_DOWNCALL name, entrypoint, return
1650    .extern \entrypoint
1651ENTRY \name
1652    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1653    mov    x2, xSELF                  // pass Thread::Current
1654    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1655    RESTORE_SAVE_REFS_ONLY_FRAME
1656    \return
1657END \name
1658.endm
1659
1660// Macro to facilitate adding new allocation entrypoints.
1661.macro THREE_ARG_DOWNCALL name, entrypoint, return
1662    .extern \entrypoint
1663ENTRY \name
1664    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1665    mov    x3, xSELF                  // pass Thread::Current
1666    bl     \entrypoint
1667    RESTORE_SAVE_REFS_ONLY_FRAME
1668    \return
1669END \name
1670.endm
1671
1672// Macro to facilitate adding new allocation entrypoints.
1673.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1674    .extern \entrypoint
1675ENTRY \name
1676    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1677    mov    x4, xSELF                  // pass Thread::Current
1678    bl     \entrypoint                //
1679    RESTORE_SAVE_REFS_ONLY_FRAME
1680    \return
1681    DELIVER_PENDING_EXCEPTION
1682END \name
1683.endm
1684
1685// Macros taking opportunity of code similarities for downcalls with referrer.
1686.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
1687    .extern \entrypoint
1688ENTRY \name
1689    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1690    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
1691    mov    x2, xSELF                  // pass Thread::Current
1692    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
1693    RESTORE_SAVE_REFS_ONLY_FRAME
1694    \return
1695END \name
1696.endm
1697
1698.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
1699    .extern \entrypoint
1700ENTRY \name
1701    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1702    ldr    x2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
1703    mov    x3, xSELF                  // pass Thread::Current
1704    bl     \entrypoint
1705    RESTORE_SAVE_REFS_ONLY_FRAME
1706    \return
1707END \name
1708.endm
1709
1710.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
1711    .extern \entrypoint
1712ENTRY \name
1713    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1714    ldr    x3, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
1715    mov    x4, xSELF                  // pass Thread::Current
1716    bl     \entrypoint
1717    RESTORE_SAVE_REFS_ONLY_FRAME
1718    \return
1719END \name
1720.endm
1721
1722.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1723    cbz w0, 1f                 // result zero branch over
1724    ret                        // return
17251:
1726    DELIVER_PENDING_EXCEPTION
1727.endm
1728
1729    /*
1730     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
1731     * failure.
1732     */
1733TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1734
1735    /*
1736     * Entry from managed code when uninitialized static storage, this stub will run the class
1737     * initializer and deliver the exception on error. On success the static storage base is
1738     * returned.
1739     */
1740ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1741
1742ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1743ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1744
1745ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1746ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1747ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1748ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1749ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1750ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1751ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1752
1753TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1754TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1755TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1756TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1757TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1758TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1759TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1760
1761TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1762TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1763TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1764TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1765
1766THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1767THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1768THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1769THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1770THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1771
1772// This is separated out as the argument order is different.
1773    .extern artSet64StaticFromCode
1774ENTRY art_quick_set64_static
1775    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1776    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
1777                                      // x2 contains the parameter
1778    mov    x3, xSELF                  // pass Thread::Current
1779    bl     artSet64StaticFromCode
1780    RESTORE_SAVE_REFS_ONLY_FRAME
1781    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1782END art_quick_set64_static
1783
1784    /*
1785     * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
1786     * exception on error. On success the String is returned. w0 holds the string index. The fast
1787     * path check for hit in strings cache has already been performed.
1788     */
1789ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1790
1791// Generate the allocation entrypoints for each allocator.
1792GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
1793// Comment out allocators that have arm64 specific asm.
1794// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
1795// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1796// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1797GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1798// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) implemented in asm
1799// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1800GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1801GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
1802GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1803GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1804GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1805GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1806
1807// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
1808ENTRY art_quick_alloc_object_rosalloc
1809    // Fast path rosalloc allocation.
1810    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
1811    // x2-x7: free.
1812    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
1813                                                              // Load the class (x2)
1814    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
1815    cbz    x2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
1816                                                              // Check class status.
1817    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]
1818    cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
1819    bne    .Lart_quick_alloc_object_rosalloc_slow_path
1820                                                              // Add a fake dependence from the
1821                                                              // following access flag and size
1822                                                              // loads to the status load.
1823                                                              // This is to prevent those loads
1824                                                              // from being reordered above the
1825                                                              // status load and reading wrong
1826                                                              // values (an alternative is to use
1827                                                              // a load-acquire for the status).
1828    eor    x3, x3, x3
1829    add    x2, x2, x3
1830                                                              // Check access flags has
1831                                                              // kAccClassIsFinalizable
1832    ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
1833    tst    x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
1834    bne    .Lart_quick_alloc_object_rosalloc_slow_path
1835    ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
1836                                                              // allocation stack has room.
1837                                                              // ldp won't work due to large offset.
1838    ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1839    cmp    x3, x4
1840    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
1841    ldr    w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (x3)
1842    cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1843                                                              // local allocation
1844    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
1845                                                              // Compute the rosalloc bracket index
1846                                                              // from the size.
1847                                                              // Align up the size by the rosalloc
1848                                                              // bracket quantum size and divide
1849                                                              // by the quantum size and subtract
1850                                                              // by 1. This code is a shorter but
1851                                                              // equivalent version.
1852    sub    x3, x3, #1
1853    lsr    x3, x3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT
1854                                                              // Load the rosalloc run (x4)
1855    add    x4, xSELF, x3, lsl #POINTER_SIZE_SHIFT
1856    ldr    x4, [x4, #THREAD_ROSALLOC_RUNS_OFFSET]
1857                                                              // Load the free list head (x3). This
1858                                                              // will be the return val.
1859    ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1860    cbz    x3, .Lart_quick_alloc_object_rosalloc_slow_path
1861    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1862    ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1863                                                              // and update the list head with the
1864                                                              // next pointer.
1865    str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1866                                                              // Store the class pointer in the
1867                                                              // header. This also overwrites the
1868                                                              // next pointer. The offsets are
1869                                                              // asserted to match.
1870#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1871#error "Class pointer needs to overwrite next pointer."
1872#endif
1873    POISON_HEAP_REF w2
1874    str    w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
1875                                                              // Fence. This is "ish" not "ishst" so
1876                                                              // that it also ensures ordering of
1877                                                              // the class status load with respect
1878                                                              // to later accesses to the class
1879                                                              // object. Alternatively we could use
1880                                                              // "ishst" if we use load-acquire for
1881                                                              // the class status load.)
1882                                                              // Needs to be done before pushing on
1883                                                              // allocation since Heap::VisitObjects
1884                                                              // relies on seeing the class pointer.
1885                                                              // b/28790624
1886    dmb    ish
1887                                                              // Push the new object onto the thread
1888                                                              // local allocation stack and
1889                                                              // increment the thread local
1890                                                              // allocation stack top.
1891    ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1892    str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
1893    str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1894                                                              // Decrement the size of the free list
1895    ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1896    sub    x1, x1, #1
1897                                                              // TODO: consider combining this store
1898                                                              // and the list head store above using
1899                                                              // strd.
1900    str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1901
1902    mov    x0, x3                                             // Set the return value and return.
1903    ret
1904.Lart_quick_alloc_object_rosalloc_slow_path:
1905    SETUP_SAVE_REFS_ONLY_FRAME             // save callee saves in case of GC
1906    mov    x2, xSELF                       // pass Thread::Current
1907    bl     artAllocObjectFromCodeRosAlloc  // (uint32_t type_idx, Method* method, Thread*)
1908    RESTORE_SAVE_REFS_ONLY_FRAME
1909    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1910END art_quick_alloc_object_rosalloc
1911
1912
1913// The common fast path code for art_quick_alloc_array_region_tlab.
1914.macro ALLOC_ARRAY_TLAB_FAST_PATH slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1915    // Check null class
1916    cbz    \wClass, \slowPathLabel
1917    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED \slowPathLabel, \xClass, \wClass, \xCount, \wCount, \xTemp0, \wTemp0, \xTemp1, \wTemp1, \xTemp2, \wTemp2
1918.endm
1919
1920// The common fast path code for art_quick_alloc_array_region_tlab.
1921.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1922    // Array classes are never finalizable or uninitialized, no need to check.
1923    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
1924    UNPOISON_HEAP_REF \wTemp0
1925    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1926    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
1927                                                              // bits.
1928                                                              // xCount is holding a 32 bit value,
1929                                                              // it can not overflow.
1930    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
1931    // Add array data offset and alignment.
1932    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1933#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1934#error Long array data offset must be 4 greater than int array data offset.
1935#endif
1936
1937    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
1938                                                              // component size shift is 3
1939                                                              // (for 64 bit alignment).
1940    and    \xTemp0, \xTemp0, #4
1941    add    \xTemp1, \xTemp1, \xTemp0
1942    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED   // Round up the object size by the
1943                                                              // object alignment. (addr + 7) & ~7.
1944                                                              // Add by 7 is done above.
1945
1946    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
1947    bhs    \slowPathLabel                                     // path.
1948
1949    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
1950                                                              // we use (end - begin) to handle
1951                                                              // negative size arrays. It is
1952                                                              // assumed that a negative size will
1953                                                              // always be greater unsigned than
1954                                                              // region size.
1955    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
1956    sub    \xTemp2, \xTemp2, \xTemp0
1957    cmp    \xTemp1, \xTemp2
1958    bhi    \slowPathLabel
1959
1960    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1961                                                              // Move old thread_local_pos to x0
1962                                                              // for the return value.
1963    mov    x0, \xTemp0
1964    add    \xTemp0, \xTemp0, \xTemp1
1965    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
1966    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
1967    add    \xTemp0, \xTemp0, #1
1968    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
1969    POISON_HEAP_REF \wClass
1970    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
1971    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
1972                                                              // Fence.
1973    dmb    ishst
1974    ret
1975.endm
1976
1977// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
1978//
1979// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
1980// x3-x7: free.
1981// Need to preserve x0 and x1 to the slow path.
1982.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
1983    cbz    x2, \slowPathLabel                                 // Check null class
1984    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
1985.endm
1986
1987.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
1988    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]              // Check class status.
1989    cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
1990    bne    \slowPathLabel
1991                                                              // Add a fake dependence from the
1992                                                              // following access flag and size
1993                                                              // loads to the status load.
1994                                                              // This is to prevent those loads
1995                                                              // from being reordered above the
1996                                                              // status load and reading wrong
1997                                                              // values (an alternative is to use
1998                                                              // a load-acquire for the status).
1999    eor    x3, x3, x3
2000    add    x2, x2, x3
2001    ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel
2002.endm
2003
2004.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
2005                                                              // Check access flags has
2006                                                              // kAccClassIsFinalizable.
2007    ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
2008    tbnz   x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT, \slowPathLabel
2009                                                              // Load thread_local_pos (x4) and
2010                                                              // thread_local_end (x5).
2011    ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
2012    ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
2013    sub    x6, x5, x4                                         // Compute the remaining buf size.
2014    ldr    w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (x7).
2015    cmp    x7, x6                                             // Check if it fits. OK to do this
2016                                                              // before rounding up the object size
2017                                                              // assuming the buf size alignment.
2018    bhi    \slowPathLabel
2019    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
2020                                                              // Round up the object size by the
2021                                                              // object alignment. (addr + 7) & ~7.
2022    add    x7, x7, #OBJECT_ALIGNMENT_MASK
2023    and    x7, x7, #OBJECT_ALIGNMENT_MASK_TOGGLED
2024                                                              // Move old thread_local_pos to x0
2025                                                              // for the return value.
2026    mov    x0, x4
2027    add    x5, x0, x7
2028    str    x5, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
2029    ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
2030    add    x5, x5, #1
2031    str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
2032    POISON_HEAP_REF w2
2033    str    w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
2034                                                              // Fence. This is "ish" not "ishst" so
2035                                                              // that the code after this allocation
2036                                                              // site will see the right values in
2037                                                              // the fields of the class.
2038                                                              // Alternatively we could use "ishst"
2039                                                              // if we use load-acquire for the
2040                                                              // class status load.)
2041    dmb    ish
2042    ret
2043.endm
2044
2045// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
2046ENTRY art_quick_alloc_object_tlab
2047    // Fast path tlab allocation.
2048    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
2049    // x2-x7: free.
2050#if defined(USE_READ_BARRIER)
2051    mvn    x0, xzr                                            // Read barrier not supported here.
2052    ret                                                       // Return -1.
2053#endif
2054    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
2055                                                              // Load the class (x2)
2056    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
2057    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
2058.Lart_quick_alloc_object_tlab_slow_path:
2059    SETUP_SAVE_REFS_ONLY_FRAME           // Save callee saves in case of GC.
2060    mov    x2, xSELF                     // Pass Thread::Current.
2061    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
2062    RESTORE_SAVE_REFS_ONLY_FRAME
2063    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
2064END art_quick_alloc_object_tlab
2065
2066// The common code for art_quick_alloc_object_*region_tlab
2067.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved
2068ENTRY \name
2069    // Fast path region tlab allocation.
2070    // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
2071    // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index.
2072    // x2-x7: free.
2073#if !defined(USE_READ_BARRIER)
2074    mvn    x0, xzr                                            // Read barrier must be enabled here.
2075    ret                                                       // Return -1.
2076#endif
2077.if \is_resolved
2078    mov    x2, x0 // class is actually stored in x0 already
2079.else
2080    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
2081                                                              // Load the class (x2)
2082    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
2083.endif
2084    // Most common case: GC is not marking.
2085    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
2086    cbnz   x3, .Lmarking\name
2087.Ldo_allocation\name:
2088    \fast_path .Lslow_path\name
2089.Lmarking\name:
2090    // GC is marking, check the lock word of the class for the mark bit.
2091    // If the class is null, go slow path. The check is required to read the lock word.
2092    cbz    w2, .Lslow_path\name
2093    // Class is not null, check mark bit in lock word.
2094    ldr    w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2095    // If the bit is not zero, do the allocation.
2096    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
2097                                                              // The read barrier slow path. Mark
2098                                                              // the class.
2099    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, lr).
2100    str    xLR, [sp, #16]                                     // Align sp by 16 bytes.
2101    mov    x0, x2                                             // Pass the class as the first param.
2102    bl     artReadBarrierMark
2103    mov    x2, x0                                             // Get the (marked) class back.
2104    ldp    x0, x1, [sp, #0]                                   // Restore registers.
2105    ldr    xLR, [sp, #16]
2106    add    sp, sp, #32
2107    b      .Ldo_allocation\name
2108.Lslow_path\name:
2109    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
2110    mov    x2, xSELF                           // Pass Thread::Current.
2111    bl     \entrypoint                         // (uint32_t type_idx, Method* method, Thread*)
2112    RESTORE_SAVE_REFS_ONLY_FRAME
2113    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
2114END \name
2115.endm
2116
2117GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0
2118GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1
2119GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1
2120
2121// The common code for art_quick_alloc_array_*region_tlab
2122.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved
2123ENTRY \name
2124    // Fast path array allocation for region tlab allocation.
2125    // x0: uint32_t type_idx
2126    // x1: int32_t component_count
2127    // x2: ArtMethod* method
2128    // x3-x7: free.
2129#if !defined(USE_READ_BARRIER)
2130    mvn    x0, xzr                                            // Read barrier must be enabled here.
2131    ret                                                       // Return -1.
2132#endif
2133.if \is_resolved
2134    mov    x3, x0
2135    // If already resolved, class is stored in x0
2136.else
2137    ldr    x3, [x2, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
2138                                                              // Load the class (x2)
2139    ldr    w3, [x3, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
2140.endif
2141    // Most common case: GC is not marking.
2142    ldr    w4, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
2143    cbnz   x4, .Lmarking\name
2144.Ldo_allocation\name:
2145    \fast_path .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
2146.Lmarking\name:
2147    // GC is marking, check the lock word of the class for the mark bit.
2148    // If the class is null, go slow path. The check is required to read the lock word.
2149    cbz    w3, .Lslow_path\name
2150    // Class is not null, check mark bit in lock word.
2151    ldr    w4, [x3, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2152    // If the bit is not zero, do the allocation.
2153    tbnz   w4, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
2154                                                              // The read barrier slow path. Mark
2155                                                              // the class.
2156    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, x2, lr).
2157    stp    x2, xLR, [sp, #16]
2158    mov    x0, x3                                             // Pass the class as the first param.
2159    bl     artReadBarrierMark
2160    mov    x3, x0                                             // Get the (marked) class back.
2161    ldp    x2, xLR, [sp, #16]
2162    ldp    x0, x1, [sp], #32                                  // Restore registers.
2163    b      .Ldo_allocation\name
2164.Lslow_path\name:
2165    // x0: uint32_t type_idx / mirror::Class* klass (if resolved)
2166    // x1: int32_t component_count
2167    // x2: ArtMethod* method
2168    // x3: Thread* self
2169    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
2170    mov    x3, xSELF                  // pass Thread::Current
2171    bl     \entrypoint
2172    RESTORE_SAVE_REFS_ONLY_FRAME
2173    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
2174END \name
2175.endm
2176
2177GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_region_tlab, artAllocArrayFromCodeRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH, 0
2178// TODO: art_quick_alloc_array_resolved_region_tlab seems to not get called. Investigate compiler.
2179GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, 1
2180
2181    /*
2182     * Called by managed code when the thread has been asked to suspend.
2183     */
2184    .extern artTestSuspendFromCode
2185ENTRY art_quick_test_suspend
2186    SETUP_SAVE_EVERYTHING_FRAME               // save callee saves for stack crawl
2187    mov    x0, xSELF
2188    bl     artTestSuspendFromCode             // (Thread*)
2189    RESTORE_SAVE_EVERYTHING_FRAME
2190    ret
2191END art_quick_test_suspend
2192
2193ENTRY art_quick_implicit_suspend
2194    mov    x0, xSELF
2195    SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
2196    bl     artTestSuspendFromCode             // (Thread*)
2197    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
2198END art_quick_implicit_suspend
2199
2200     /*
2201     * Called by managed code that is attempting to call a method on a proxy class. On entry
2202     * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
2203     * method agrees with a ref and args callee save frame.
2204     */
2205     .extern artQuickProxyInvokeHandler
2206ENTRY art_quick_proxy_invoke_handler
2207    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2208    mov     x2, xSELF                   // pass Thread::Current
2209    mov     x3, sp                      // pass SP
2210    bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
2211    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2212    cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
2213    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
2214    fmov    d0, x0                      // Store result in d0 in case it was float or double
2215    ret                                 // return on success
2216.Lexception_in_proxy:
2217    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2218    DELIVER_PENDING_EXCEPTION
2219END art_quick_proxy_invoke_handler
2220
2221    /*
2222     * Called to resolve an imt conflict.
2223     * x0 is the conflict ArtMethod.
2224     * xIP1 is a hidden argument that holds the target interface method's dex method index.
2225     *
2226     * Note that this stub writes to xIP0, xIP1, and x0.
2227     */
2228    .extern artInvokeInterfaceTrampoline
2229ENTRY art_quick_imt_conflict_trampoline
2230    ldr xIP0, [sp, #0]  // Load referrer
2231    ldr xIP0, [xIP0, #ART_METHOD_DEX_CACHE_METHODS_OFFSET_64]   // Load dex cache methods array
2232    ldr xIP0, [xIP0, xIP1, lsl #POINTER_SIZE_SHIFT]  // Load interface method
2233    ldr xIP1, [x0, #ART_METHOD_JNI_OFFSET_64]  // Load ImtConflictTable
2234    ldr x0, [xIP1]  // Load first entry in ImtConflictTable.
2235.Limt_table_iterate:
2236    cmp x0, xIP0
2237    // Branch if found. Benchmarks have shown doing a branch here is better.
2238    beq .Limt_table_found
2239    // If the entry is null, the interface method is not in the ImtConflictTable.
2240    cbz x0, .Lconflict_trampoline
2241    // Iterate over the entries of the ImtConflictTable.
2242    ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
2243    b .Limt_table_iterate
2244.Limt_table_found:
2245    // We successfully hit an entry in the table. Load the target method
2246    // and jump to it.
2247    ldr x0, [xIP1, #__SIZEOF_POINTER__]
2248    ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
2249    br xIP0
2250.Lconflict_trampoline:
2251    // Call the runtime stub to populate the ImtConflictTable and jump to the
2252    // resolved method.
2253    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
2254END art_quick_imt_conflict_trampoline
2255
2256ENTRY art_quick_resolution_trampoline
2257    SETUP_SAVE_REFS_AND_ARGS_FRAME
2258    mov x2, xSELF
2259    mov x3, sp
2260    bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
2261    cbz x0, 1f
2262    mov xIP0, x0            // Remember returned code pointer in xIP0.
2263    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
2264    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2265    br xIP0
22661:
2267    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2268    DELIVER_PENDING_EXCEPTION
2269END art_quick_resolution_trampoline
2270
2271/*
2272 * Generic JNI frame layout:
2273 *
2274 * #-------------------#
2275 * |                   |
2276 * | caller method...  |
2277 * #-------------------#    <--- SP on entry
2278 * | Return X30/LR     |
2279 * | X29/FP            |    callee save
2280 * | X28               |    callee save
2281 * | X27               |    callee save
2282 * | X26               |    callee save
2283 * | X25               |    callee save
2284 * | X24               |    callee save
2285 * | X23               |    callee save
2286 * | X22               |    callee save
2287 * | X21               |    callee save
2288 * | X20               |    callee save
2289 * | X19               |    callee save
2290 * | X7                |    arg7
2291 * | X6                |    arg6
2292 * | X5                |    arg5
2293 * | X4                |    arg4
2294 * | X3                |    arg3
2295 * | X2                |    arg2
2296 * | X1                |    arg1
2297 * | D7                |    float arg 8
2298 * | D6                |    float arg 7
2299 * | D5                |    float arg 6
2300 * | D4                |    float arg 5
2301 * | D3                |    float arg 4
2302 * | D2                |    float arg 3
2303 * | D1                |    float arg 2
2304 * | D0                |    float arg 1
2305 * | Method*           | <- X0
2306 * #-------------------#
2307 * | local ref cookie  | // 4B
2308 * | handle scope size | // 4B
2309 * #-------------------#
2310 * | JNI Call Stack    |
2311 * #-------------------#    <--- SP on native call
2312 * |                   |
2313 * | Stack for Regs    |    The trampoline assembly will pop these values
2314 * |                   |    into registers for native call
2315 * #-------------------#
2316 * | Native code ptr   |
2317 * #-------------------#
2318 * | Free scratch      |
2319 * #-------------------#
2320 * | Ptr to (1)        |    <--- SP
2321 * #-------------------#
2322 */
2323    /*
2324     * Called to do a generic JNI down-call
2325     */
2326ENTRY art_quick_generic_jni_trampoline
2327    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2328
2329    // Save SP , so we can have static CFI info.
2330    mov x28, sp
2331    .cfi_def_cfa_register x28
2332
2333    // This looks the same, but is different: this will be updated to point to the bottom
2334    // of the frame when the handle scope is inserted.
2335    mov xFP, sp
2336
2337    mov xIP0, #5120
2338    sub sp, sp, xIP0
2339
2340    // prepare for artQuickGenericJniTrampoline call
2341    // (Thread*,  SP)
2342    //    x0      x1   <= C calling convention
2343    //   xSELF    xFP  <= where they are
2344
2345    mov x0, xSELF   // Thread*
2346    mov x1, xFP
2347    bl artQuickGenericJniTrampoline  // (Thread*, sp)
2348
2349    // The C call will have registered the complete save-frame on success.
2350    // The result of the call is:
2351    // x0: pointer to native code, 0 on error.
2352    // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
2353
2354    // Check for error = 0.
2355    cbz x0, .Lexception_in_native
2356
2357    // Release part of the alloca.
2358    mov sp, x1
2359
2360    // Save the code pointer
2361    mov xIP0, x0
2362
2363    // Load parameters from frame into registers.
2364    // TODO Check with artQuickGenericJniTrampoline.
2365    //      Also, check again APPCS64 - the stack arguments are interleaved.
2366    ldp x0, x1, [sp]
2367    ldp x2, x3, [sp, #16]
2368    ldp x4, x5, [sp, #32]
2369    ldp x6, x7, [sp, #48]
2370
2371    ldp d0, d1, [sp, #64]
2372    ldp d2, d3, [sp, #80]
2373    ldp d4, d5, [sp, #96]
2374    ldp d6, d7, [sp, #112]
2375
2376    add sp, sp, #128
2377
2378    blr xIP0        // native call.
2379
2380    // result sign extension is handled in C code
2381    // prepare for artQuickGenericJniEndTrampoline call
2382    // (Thread*, result, result_f)
2383    //    x0       x1       x2        <= C calling convention
2384    mov x1, x0      // Result (from saved).
2385    mov x0, xSELF   // Thread register.
2386    fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
2387
2388    bl artQuickGenericJniEndTrampoline
2389
2390    // Pending exceptions possible.
2391    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2392    cbnz x2, .Lexception_in_native
2393
2394    // Tear down the alloca.
2395    mov sp, x28
2396    .cfi_def_cfa_register sp
2397
2398    // Tear down the callee-save frame.
2399    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2400
2401    // store into fpr, for when it's a fpr return...
2402    fmov d0, x0
2403    ret
2404
2405.Lexception_in_native:
2406    // Move to x1 then sp to please assembler.
2407    ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
2408    mov sp, x1
2409    .cfi_def_cfa_register sp
2410    # This will create a new save-all frame, required by the runtime.
2411    DELIVER_PENDING_EXCEPTION
2412END art_quick_generic_jni_trampoline
2413
2414/*
2415 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
2416 * of a quick call:
2417 * x0 = method being called/to bridge to.
2418 * x1..x7, d0..d7 = arguments to that method.
2419 */
2420ENTRY art_quick_to_interpreter_bridge
2421    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
2422
2423    //  x0 will contain mirror::ArtMethod* method.
2424    mov x1, xSELF                          // How to get Thread::Current() ???
2425    mov x2, sp
2426
2427    // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
2428    //                                      mirror::ArtMethod** sp)
2429    bl   artQuickToInterpreterBridge
2430
2431    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
2432
2433    fmov d0, x0
2434
2435    RETURN_OR_DELIVER_PENDING_EXCEPTION
2436END art_quick_to_interpreter_bridge
2437
2438
2439//
2440// Instrumentation-related stubs
2441//
2442    .extern artInstrumentationMethodEntryFromCode
2443ENTRY art_quick_instrumentation_entry
2444    SETUP_SAVE_REFS_AND_ARGS_FRAME
2445
2446    mov   x20, x0             // Preserve method reference in a callee-save.
2447
2448    mov   x2, xSELF
2449    mov   x3, xLR
2450    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, LR)
2451
2452    mov   xIP0, x0            // x0 = result of call.
2453    mov   x0, x20             // Reload method reference.
2454
2455    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
2456    adr   xLR, art_quick_instrumentation_exit
2457    br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
2458END art_quick_instrumentation_entry
2459
2460    .extern artInstrumentationMethodExitFromCode
2461ENTRY art_quick_instrumentation_exit
2462    mov   xLR, #0             // Clobber LR for later checks.
2463
2464    SETUP_SAVE_REFS_ONLY_FRAME
2465
2466    // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
2467    // we would need to fully restore it. As there are a lot of callee-save registers, it seems
2468    // easier to have an extra small stack area.
2469
2470    str x0, [sp, #-16]!       // Save integer result.
2471    .cfi_adjust_cfa_offset 16
2472    str d0,  [sp, #8]         // Save floating-point result.
2473
2474    add   x1, sp, #16         // Pass SP.
2475    mov   x2, x0              // Pass integer result.
2476    fmov  x3, d0              // Pass floating-point result.
2477    mov   x0, xSELF           // Pass Thread.
2478    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
2479
2480    mov   xIP0, x0            // Return address from instrumentation call.
2481    mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
2482
2483    ldr   d0, [sp, #8]        // Restore floating-point result.
2484    ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
2485    .cfi_adjust_cfa_offset 16
2486
2487    POP_SAVE_REFS_ONLY_FRAME
2488
2489    br    xIP0                // Tail-call out.
2490END art_quick_instrumentation_exit
2491
2492    /*
2493     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
2494     * will long jump to the upcall with a special exception of -1.
2495     */
2496    .extern artDeoptimize
2497ENTRY art_quick_deoptimize
2498    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
2499    mov    x0, xSELF          // Pass thread.
2500    bl     artDeoptimize      // artDeoptimize(Thread*)
2501    brk 0
2502END art_quick_deoptimize
2503
2504    /*
2505     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
2506     * will long jump to the upcall with a special exception of -1.
2507     */
2508    .extern artDeoptimizeFromCompiledCode
2509ENTRY art_quick_deoptimize_from_compiled_code
2510    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
2511    mov    x0, xSELF                      // Pass thread.
2512    bl     artDeoptimizeFromCompiledCode  // artDeoptimizeFromCompiledCode(Thread*)
2513    brk 0
2514END art_quick_deoptimize_from_compiled_code
2515
2516
2517    /*
2518     * String's indexOf.
2519     *
2520     * TODO: Not very optimized.
2521     * On entry:
2522     *    x0:   string object (known non-null)
2523     *    w1:   char to match (known <= 0xFFFF)
2524     *    w2:   Starting offset in string data
2525     */
2526ENTRY art_quick_indexof
2527    ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
2528    add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
2529
2530    /* Clamp start to [0..count] */
2531    cmp   w2, #0
2532    csel  w2, wzr, w2, lt
2533    cmp   w2, w3
2534    csel  w2, w3, w2, gt
2535
2536    /* Save a copy to compute result */
2537    mov   x5, x0
2538
2539    /* Build pointer to start of data to compare and pre-bias */
2540    add   x0, x0, x2, lsl #1
2541    sub   x0, x0, #2
2542
2543    /* Compute iteration count */
2544    sub   w2, w3, w2
2545
2546    /*
2547     * At this point we have:
2548     *  x0: start of the data to test
2549     *  w1: char to compare
2550     *  w2: iteration count
2551     *  x5: original start of string data
2552     */
2553
2554    subs  w2, w2, #4
2555    b.lt  .Lindexof_remainder
2556
2557.Lindexof_loop4:
2558    ldrh  w6, [x0, #2]!
2559    ldrh  w7, [x0, #2]!
2560    ldrh  wIP0, [x0, #2]!
2561    ldrh  wIP1, [x0, #2]!
2562    cmp   w6, w1
2563    b.eq  .Lmatch_0
2564    cmp   w7, w1
2565    b.eq  .Lmatch_1
2566    cmp   wIP0, w1
2567    b.eq  .Lmatch_2
2568    cmp   wIP1, w1
2569    b.eq  .Lmatch_3
2570    subs  w2, w2, #4
2571    b.ge  .Lindexof_loop4
2572
2573.Lindexof_remainder:
2574    adds  w2, w2, #4
2575    b.eq  .Lindexof_nomatch
2576
2577.Lindexof_loop1:
2578    ldrh  w6, [x0, #2]!
2579    cmp   w6, w1
2580    b.eq  .Lmatch_3
2581    subs  w2, w2, #1
2582    b.ne  .Lindexof_loop1
2583
2584.Lindexof_nomatch:
2585    mov   x0, #-1
2586    ret
2587
2588.Lmatch_0:
2589    sub   x0, x0, #6
2590    sub   x0, x0, x5
2591    asr   x0, x0, #1
2592    ret
2593.Lmatch_1:
2594    sub   x0, x0, #4
2595    sub   x0, x0, x5
2596    asr   x0, x0, #1
2597    ret
2598.Lmatch_2:
2599    sub   x0, x0, #2
2600    sub   x0, x0, x5
2601    asr   x0, x0, #1
2602    ret
2603.Lmatch_3:
2604    sub   x0, x0, x5
2605    asr   x0, x0, #1
2606    ret
2607END art_quick_indexof
2608
2609    /*
2610     * Create a function `name` calling the ReadBarrier::Mark routine,
2611     * getting its argument and returning its result through W register
2612     * `wreg` (corresponding to X register `xreg`), saving and restoring
2613     * all caller-save registers.
2614     *
2615     * If `wreg` is different from `w0`, the generated function follows a
2616     * non-standard runtime calling convention:
2617     * - register `wreg` is used to pass the (sole) argument of this
2618     *   function (instead of W0);
2619     * - register `wreg` is used to return the result of this function
2620     *   (instead of W0);
2621     * - W0 is treated like a normal (non-argument) caller-save register;
2622     * - everything else is the same as in the standard runtime calling
2623     *   convention (e.g. standard callee-save registers are preserved).
2624     */
2625.macro READ_BARRIER_MARK_REG name, wreg, xreg
2626ENTRY \name
2627    // Reference is null, no work to do at all.
2628    cbz \wreg, .Lret_rb_\name
2629    /*
2630     * Allocate 46 stack slots * 8 = 368 bytes:
2631     * - 20 slots for core registers X0-X19
2632     * - 24 slots for floating-point registers D0-D7 and D16-D31
2633     * -  1 slot for return address register XLR
2634     * -  1 padding slot for 16-byte stack alignment
2635     */
2636    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
2637    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2638    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
2639    ret
2640.Lslow_path_rb_\name:
2641    // Save all potentially live caller-save core registers.
2642    stp   x0, x1,   [sp, #-368]!
2643    .cfi_adjust_cfa_offset 368
2644    .cfi_rel_offset x0, 0
2645    .cfi_rel_offset x1, 8
2646    stp   x2, x3,   [sp, #16]
2647    .cfi_rel_offset x2, 16
2648    .cfi_rel_offset x3, 24
2649    stp   x4, x5,   [sp, #32]
2650    .cfi_rel_offset x4, 32
2651    .cfi_rel_offset x5, 40
2652    stp   x6, x7,   [sp, #48]
2653    .cfi_rel_offset x6, 48
2654    .cfi_rel_offset x7, 56
2655    stp   x8, x9,   [sp, #64]
2656    .cfi_rel_offset x8, 64
2657    .cfi_rel_offset x9, 72
2658    stp   x10, x11, [sp, #80]
2659    .cfi_rel_offset x10, 80
2660    .cfi_rel_offset x11, 88
2661    stp   x12, x13, [sp, #96]
2662    .cfi_rel_offset x12, 96
2663    .cfi_rel_offset x13, 104
2664    stp   x14, x15, [sp, #112]
2665    .cfi_rel_offset x14, 112
2666    .cfi_rel_offset x15, 120
2667    stp   x16, x17, [sp, #128]
2668    .cfi_rel_offset x16, 128
2669    .cfi_rel_offset x17, 136
2670    stp   x18, x19, [sp, #144]
2671    .cfi_rel_offset x18, 144
2672    .cfi_rel_offset x19, 152
2673    // Save all potentially live caller-save floating-point registers.
2674    stp   d0, d1,   [sp, #160]
2675    stp   d2, d3,   [sp, #176]
2676    stp   d4, d5,   [sp, #192]
2677    stp   d6, d7,   [sp, #208]
2678    stp   d16, d17, [sp, #224]
2679    stp   d18, d19, [sp, #240]
2680    stp   d20, d21, [sp, #256]
2681    stp   d22, d23, [sp, #272]
2682    stp   d24, d25, [sp, #288]
2683    stp   d26, d27, [sp, #304]
2684    stp   d28, d29, [sp, #320]
2685    stp   d30, d31, [sp, #336]
2686    // Save return address.
2687    str   xLR,      [sp, #352]
2688    .cfi_rel_offset x30, 352
2689    // (sp + #360 is a padding slot)
2690
2691    .ifnc \wreg, w0
2692      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
2693    .endif
2694    bl    artReadBarrierMark            // artReadBarrierMark(obj)
2695    .ifnc \wreg, w0
2696      mov   \wreg, w0                   // Return result into `wreg`
2697    .endif
2698
2699    // Restore core regs, except `xreg`, as `wreg` is used to return the
2700    // result of this function (simply remove it from the stack instead).
2701    POP_REGS_NE x0, x1,   0,   \xreg
2702    POP_REGS_NE x2, x3,   16,  \xreg
2703    POP_REGS_NE x4, x5,   32,  \xreg
2704    POP_REGS_NE x6, x7,   48,  \xreg
2705    POP_REGS_NE x8, x9,   64,  \xreg
2706    POP_REGS_NE x10, x11, 80,  \xreg
2707    POP_REGS_NE x12, x13, 96,  \xreg
2708    POP_REGS_NE x14, x15, 112, \xreg
2709    POP_REGS_NE x16, x17, 128, \xreg
2710    POP_REGS_NE x18, x19, 144, \xreg
2711    // Restore floating-point registers.
2712    ldp   d0, d1,   [sp, #160]
2713    ldp   d2, d3,   [sp, #176]
2714    ldp   d4, d5,   [sp, #192]
2715    ldp   d6, d7,   [sp, #208]
2716    ldp   d16, d17, [sp, #224]
2717    ldp   d18, d19, [sp, #240]
2718    ldp   d20, d21, [sp, #256]
2719    ldp   d22, d23, [sp, #272]
2720    ldp   d24, d25, [sp, #288]
2721    ldp   d26, d27, [sp, #304]
2722    ldp   d28, d29, [sp, #320]
2723    ldp   d30, d31, [sp, #336]
2724    // Restore return address and remove padding.
2725    ldr   xLR,      [sp, #352]
2726    .cfi_restore x30
2727    add sp, sp, #368
2728    .cfi_adjust_cfa_offset -368
2729.Lret_rb_\name:
2730    ret
2731END \name
2732.endm
2733
2734READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
2735READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
2736READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
2737READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
2738READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
2739READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
2740READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
2741READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
2742READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
2743READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
2744READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
2745READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
2746READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
2747READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
2748READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
2749READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
2750READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16
2751READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
2752READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
2753READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
2754READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
2755READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
2756READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
2757READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
2758READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
2759READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
2760READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
2761READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
2762READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
2763READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
2764