quick_entrypoints_arm64.S revision 5f404331c0ae5217d35ee8a7be77cde5c54a49de
1/*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "asm_support_arm64.S"
18
19#include "arch/quick_alloc_entrypoints.S"
20
21
22    /*
23     * Macro that sets up the callee save frame to conform with
24     * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
25     */
26.macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
27    // art::Runtime** xIP0 = &art::Runtime::instance_
28    adrp xIP0, :got:_ZN3art7Runtime9instance_E
29    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
30
31    // Our registers aren't intermixed - just spill in order.
32    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
33
34    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
35    ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
36
37    sub sp, sp, #176
38    .cfi_adjust_cfa_offset 176
39
40    // Ugly compile-time check, but we only have the preprocessor.
41#if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
42#error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
43#endif
44
45    // Stack alignment filler [sp, #8].
46    // FP callee-saves.
47    stp d8, d9,   [sp, #16]
48    stp d10, d11, [sp, #32]
49    stp d12, d13, [sp, #48]
50    stp d14, d15, [sp, #64]
51
52    // GP callee-saves
53    stp x19, x20, [sp, #80]
54    .cfi_rel_offset x19, 80
55    .cfi_rel_offset x20, 88
56
57    stp x21, x22, [sp, #96]
58    .cfi_rel_offset x21, 96
59    .cfi_rel_offset x22, 104
60
61    stp x23, x24, [sp, #112]
62    .cfi_rel_offset x23, 112
63    .cfi_rel_offset x24, 120
64
65    stp x25, x26, [sp, #128]
66    .cfi_rel_offset x25, 128
67    .cfi_rel_offset x26, 136
68
69    stp x27, x28, [sp, #144]
70    .cfi_rel_offset x27, 144
71    .cfi_rel_offset x28, 152
72
73    stp x29, xLR, [sp, #160]
74    .cfi_rel_offset x29, 160
75    .cfi_rel_offset x30, 168
76
77    // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
78    str xIP0, [sp]
79    // Place sp in Thread::Current()->top_quick_frame.
80    mov xIP0, sp
81    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
82.endm
83
84    /*
85     * Macro that sets up the callee save frame to conform with
86     * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
87     */
88.macro SETUP_SAVE_REFS_ONLY_FRAME
89    // art::Runtime** xIP0 = &art::Runtime::instance_
90    adrp xIP0, :got:_ZN3art7Runtime9instance_E
91    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
92
93    // Our registers aren't intermixed - just spill in order.
94    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
95
96    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
97    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
98
99    sub sp, sp, #96
100    .cfi_adjust_cfa_offset 96
101
102    // Ugly compile-time check, but we only have the preprocessor.
103#if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
104#error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
105#endif
106
107    // GP callee-saves.
108    // x20 paired with ArtMethod* - see below.
109    stp x21, x22, [sp, #16]
110    .cfi_rel_offset x21, 16
111    .cfi_rel_offset x22, 24
112
113    stp x23, x24, [sp, #32]
114    .cfi_rel_offset x23, 32
115    .cfi_rel_offset x24, 40
116
117    stp x25, x26, [sp, #48]
118    .cfi_rel_offset x25, 48
119    .cfi_rel_offset x26, 56
120
121    stp x27, x28, [sp, #64]
122    .cfi_rel_offset x27, 64
123    .cfi_rel_offset x28, 72
124
125    stp x29, xLR, [sp, #80]
126    .cfi_rel_offset x29, 80
127    .cfi_rel_offset x30, 88
128
129    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
130    stp xIP0, x20, [sp]
131    .cfi_rel_offset x20, 8
132
133    // Place sp in Thread::Current()->top_quick_frame.
134    mov xIP0, sp
135    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
136.endm
137
138// TODO: Probably no need to restore registers preserved by aapcs64.
139.macro RESTORE_SAVE_REFS_ONLY_FRAME
140    // Callee-saves.
141    ldr x20, [sp, #8]
142    .cfi_restore x20
143
144    ldp x21, x22, [sp, #16]
145    .cfi_restore x21
146    .cfi_restore x22
147
148    ldp x23, x24, [sp, #32]
149    .cfi_restore x23
150    .cfi_restore x24
151
152    ldp x25, x26, [sp, #48]
153    .cfi_restore x25
154    .cfi_restore x26
155
156    ldp x27, x28, [sp, #64]
157    .cfi_restore x27
158    .cfi_restore x28
159
160    ldp x29, xLR, [sp, #80]
161    .cfi_restore x29
162    .cfi_restore x30
163
164    add sp, sp, #96
165    .cfi_adjust_cfa_offset -96
166.endm
167
168.macro POP_SAVE_REFS_ONLY_FRAME
169    add sp, sp, #96
170    .cfi_adjust_cfa_offset - 96
171.endm
172
173.macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
174    RESTORE_SAVE_REFS_ONLY_FRAME
175    ret
176.endm
177
178
179.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
180    sub sp, sp, #224
181    .cfi_adjust_cfa_offset 224
182
183    // Ugly compile-time check, but we only have the preprocessor.
184#if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
185#error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
186#endif
187
188    // Stack alignment filler [sp, #8].
189    // FP args.
190    stp d0, d1, [sp, #16]
191    stp d2, d3, [sp, #32]
192    stp d4, d5, [sp, #48]
193    stp d6, d7, [sp, #64]
194
195    // Core args.
196    stp x1, x2, [sp, #80]
197    .cfi_rel_offset x1, 80
198    .cfi_rel_offset x2, 88
199
200    stp x3, x4, [sp, #96]
201    .cfi_rel_offset x3, 96
202    .cfi_rel_offset x4, 104
203
204    stp x5, x6, [sp, #112]
205    .cfi_rel_offset x5, 112
206    .cfi_rel_offset x6, 120
207
208    // x7, Callee-saves.
209    stp x7, x20, [sp, #128]
210    .cfi_rel_offset x7, 128
211    .cfi_rel_offset x20, 136
212
213    stp x21, x22, [sp, #144]
214    .cfi_rel_offset x21, 144
215    .cfi_rel_offset x22, 152
216
217    stp x23, x24, [sp, #160]
218    .cfi_rel_offset x23, 160
219    .cfi_rel_offset x24, 168
220
221    stp x25, x26, [sp, #176]
222    .cfi_rel_offset x25, 176
223    .cfi_rel_offset x26, 184
224
225    stp x27, x28, [sp, #192]
226    .cfi_rel_offset x27, 192
227    .cfi_rel_offset x28, 200
228
229    // x29(callee-save) and LR.
230    stp x29, xLR, [sp, #208]
231    .cfi_rel_offset x29, 208
232    .cfi_rel_offset x30, 216
233
234.endm
235
236    /*
237     * Macro that sets up the callee save frame to conform with
238     * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
239     *
240     * TODO This is probably too conservative - saving FP & LR.
241     */
242.macro SETUP_SAVE_REFS_AND_ARGS_FRAME
243    // art::Runtime** xIP0 = &art::Runtime::instance_
244    adrp xIP0, :got:_ZN3art7Runtime9instance_E
245    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
246
247    // Our registers aren't intermixed - just spill in order.
248    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
249
250    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
251    ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
252
253    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
254
255    str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
256    // Place sp in Thread::Current()->top_quick_frame.
257    mov xIP0, sp
258    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
259.endm
260
261.macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
262    SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
263    str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
264    // Place sp in Thread::Current()->top_quick_frame.
265    mov xIP0, sp
266    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
267.endm
268
269// TODO: Probably no need to restore registers preserved by aapcs64.
270.macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
271    // FP args.
272    ldp d0, d1, [sp, #16]
273    ldp d2, d3, [sp, #32]
274    ldp d4, d5, [sp, #48]
275    ldp d6, d7, [sp, #64]
276
277    // Core args.
278    ldp x1, x2, [sp, #80]
279    .cfi_restore x1
280    .cfi_restore x2
281
282    ldp x3, x4, [sp, #96]
283    .cfi_restore x3
284    .cfi_restore x4
285
286    ldp x5, x6, [sp, #112]
287    .cfi_restore x5
288    .cfi_restore x6
289
290    // x7, Callee-saves.
291    ldp x7, x20, [sp, #128]
292    .cfi_restore x7
293    .cfi_restore x20
294
295    ldp x21, x22, [sp, #144]
296    .cfi_restore x21
297    .cfi_restore x22
298
299    ldp x23, x24, [sp, #160]
300    .cfi_restore x23
301    .cfi_restore x24
302
303    ldp x25, x26, [sp, #176]
304    .cfi_restore x25
305    .cfi_restore x26
306
307    ldp x27, x28, [sp, #192]
308    .cfi_restore x27
309    .cfi_restore x28
310
311    // x29(callee-save) and LR.
312    ldp x29, xLR, [sp, #208]
313    .cfi_restore x29
314    .cfi_restore x30
315
316    add sp, sp, #224
317    .cfi_adjust_cfa_offset -224
318.endm
319
320    /*
321     * Macro that sets up the callee save frame to conform with
322     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
323     */
324.macro SETUP_SAVE_EVERYTHING_FRAME
325    sub sp, sp, #512
326    .cfi_adjust_cfa_offset 512
327
328    // Ugly compile-time check, but we only have the preprocessor.
329#if (FRAME_SIZE_SAVE_EVERYTHING != 512)
330#error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
331#endif
332
333    // Save FP registers.
334    // For better performance, store d0 and d31 separately, so that all STPs are 16-byte aligned.
335    str d0,       [sp, #8]
336    stp d1, d2,   [sp, #16]
337    stp d3, d4,   [sp, #32]
338    stp d5, d6,   [sp, #48]
339    stp d7, d8,   [sp, #64]
340    stp d9, d10,  [sp, #80]
341    stp d11, d12, [sp, #96]
342    stp d13, d14, [sp, #112]
343    stp d15, d16, [sp, #128]
344    stp d17, d18, [sp, #144]
345    stp d19, d20, [sp, #160]
346    stp d21, d22, [sp, #176]
347    stp d23, d24, [sp, #192]
348    stp d25, d26, [sp, #208]
349    stp d27, d28, [sp, #224]
350    stp d29, d30, [sp, #240]
351    str d31,      [sp, #256]
352
353    // Save core registers.
354    str x0,       [sp, #264]
355    .cfi_rel_offset x0, 264
356
357    stp x1, x2,   [sp, #272]
358    .cfi_rel_offset x1, 272
359    .cfi_rel_offset x2, 280
360
361    stp x3, x4,   [sp, #288]
362    .cfi_rel_offset x3, 288
363    .cfi_rel_offset x4, 296
364
365    stp x5, x6,   [sp, #304]
366    .cfi_rel_offset x5, 304
367    .cfi_rel_offset x6, 312
368
369    stp x7, x8,   [sp, #320]
370    .cfi_rel_offset x7, 320
371    .cfi_rel_offset x8, 328
372
373    stp x9, x10,  [sp, #336]
374    .cfi_rel_offset x9, 336
375    .cfi_rel_offset x10, 344
376
377    stp x11, x12, [sp, #352]
378    .cfi_rel_offset x11, 352
379    .cfi_rel_offset x12, 360
380
381    stp x13, x14, [sp, #368]
382    .cfi_rel_offset x13, 368
383    .cfi_rel_offset x14, 376
384
385    stp x15, x16, [sp, #384]
386    .cfi_rel_offset x15, 384
387    .cfi_rel_offset x16, 392
388
389    stp x17, x18, [sp, #400]
390    .cfi_rel_offset x17, 400
391    .cfi_rel_offset x18, 408
392
393    stp x19, x20, [sp, #416]
394    .cfi_rel_offset x19, 416
395    .cfi_rel_offset x20, 424
396
397    stp x21, x22, [sp, #432]
398    .cfi_rel_offset x21, 432
399    .cfi_rel_offset x22, 440
400
401    stp x23, x24, [sp, #448]
402    .cfi_rel_offset x23, 448
403    .cfi_rel_offset x24, 456
404
405    stp x25, x26, [sp, #464]
406    .cfi_rel_offset x25, 464
407    .cfi_rel_offset x26, 472
408
409    stp x27, x28, [sp, #480]
410    .cfi_rel_offset x27, 480
411    .cfi_rel_offset x28, 488
412
413    stp x29, xLR, [sp, #496]
414    .cfi_rel_offset x29, 496
415    .cfi_rel_offset x30, 504
416
417    // art::Runtime** xIP0 = &art::Runtime::instance_
418    adrp xIP0, :got:_ZN3art7Runtime9instance_E
419    ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
420
421    ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
422
423    // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
424    ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
425
426    // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
427    str xIP0, [sp]
428    // Place sp in Thread::Current()->top_quick_frame.
429    mov xIP0, sp
430    str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
431.endm
432
433.macro RESTORE_SAVE_EVERYTHING_FRAME
434    // Restore FP registers.
435    // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
436    ldr d0,       [sp, #8]
437    ldp d1, d2,   [sp, #16]
438    ldp d3, d4,   [sp, #32]
439    ldp d5, d6,   [sp, #48]
440    ldp d7, d8,   [sp, #64]
441    ldp d9, d10,  [sp, #80]
442    ldp d11, d12, [sp, #96]
443    ldp d13, d14, [sp, #112]
444    ldp d15, d16, [sp, #128]
445    ldp d17, d18, [sp, #144]
446    ldp d19, d20, [sp, #160]
447    ldp d21, d22, [sp, #176]
448    ldp d23, d24, [sp, #192]
449    ldp d25, d26, [sp, #208]
450    ldp d27, d28, [sp, #224]
451    ldp d29, d30, [sp, #240]
452    ldr d31,      [sp, #256]
453
454    // Restore core registers.
455    ldr x0,       [sp, #264]
456    .cfi_restore x0
457
458    ldp x1, x2,   [sp, #272]
459    .cfi_restore x1
460    .cfi_restore x2
461
462    ldp x3, x4,   [sp, #288]
463    .cfi_restore x3
464    .cfi_restore x4
465
466    ldp x5, x6,   [sp, #304]
467    .cfi_restore x5
468    .cfi_restore x6
469
470    ldp x7, x8,   [sp, #320]
471    .cfi_restore x7
472    .cfi_restore x8
473
474    ldp x9, x10,  [sp, #336]
475    .cfi_restore x9
476    .cfi_restore x10
477
478    ldp x11, x12, [sp, #352]
479    .cfi_restore x11
480    .cfi_restore x12
481
482    ldp x13, x14, [sp, #368]
483    .cfi_restore x13
484    .cfi_restore x14
485
486    ldp x15, x16, [sp, #384]
487    .cfi_restore x15
488    .cfi_restore x16
489
490    ldp x17, x18, [sp, #400]
491    .cfi_restore x17
492    .cfi_restore x18
493
494    ldp x19, x20, [sp, #416]
495    .cfi_restore x19
496    .cfi_restore x20
497
498    ldp x21, x22, [sp, #432]
499    .cfi_restore x21
500    .cfi_restore x22
501
502    ldp x23, x24, [sp, #448]
503    .cfi_restore x23
504    .cfi_restore x24
505
506    ldp x25, x26, [sp, #464]
507    .cfi_restore x25
508    .cfi_restore x26
509
510    ldp x27, x28, [sp, #480]
511    .cfi_restore x27
512    .cfi_restore x28
513
514    ldp x29, xLR, [sp, #496]
515    .cfi_restore x29
516    .cfi_restore x30
517
518    add sp, sp, #512
519    .cfi_adjust_cfa_offset -512
520.endm
521
522.macro RETURN_IF_RESULT_IS_ZERO
523    cbnz x0, 1f                // result non-zero branch over
524    ret                        // return
5251:
526.endm
527
528.macro RETURN_IF_RESULT_IS_NON_ZERO
529    cbz x0, 1f                 // result zero branch over
530    ret                        // return
5311:
532.endm
533
534    /*
535     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
536     * exception is Thread::Current()->exception_
537     */
538.macro DELIVER_PENDING_EXCEPTION
539    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
540    mov x0, xSELF
541
542    // Point of no return.
543    b artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*)
544    brk 0  // Unreached
545.endm
546
547.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
548    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
549    cbnz \reg, 1f
550    ret
5511:
552    DELIVER_PENDING_EXCEPTION
553.endm
554
555.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
556    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0
557.endm
558
559// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
560.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
561    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
562.endm
563
564.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
565    cbnz w0, 1f                // result non-zero branch over
566    ret                        // return
5671:
568    DELIVER_PENDING_EXCEPTION
569.endm
570
571.macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
572    .extern \cxx_name
573ENTRY \c_name
574    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
575    mov x0, xSELF                     // pass Thread::Current
576    b   \cxx_name                     // \cxx_name(Thread*)
577END \c_name
578.endm
579
580.macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
581    .extern \cxx_name
582ENTRY \c_name
583    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
584    mov x1, xSELF                     // pass Thread::Current.
585    b   \cxx_name                     // \cxx_name(arg, Thread*).
586    brk 0
587END \c_name
588.endm
589
590.macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
591    .extern \cxx_name
592ENTRY \c_name
593    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
594    mov x2, xSELF                     // pass Thread::Current
595    b   \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
596    brk 0
597END \c_name
598.endm
599
600    /*
601     * Called by managed code, saves callee saves and then calls artThrowException
602     * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
603     */
604ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
605
606    /*
607     * Called by managed code to create and deliver a NullPointerException.
608     */
609NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
610
611    /*
612     * Call installed by a signal handler to create and deliver a NullPointerException.
613     */
614ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception_from_signal, artThrowNullPointerExceptionFromSignal
615
616    /*
617     * Called by managed code to create and deliver an ArithmeticException.
618     */
619NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
620
621    /*
622     * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
623     * index, arg2 holds limit.
624     */
625TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
626
627    /*
628     * Called by managed code to create and deliver a StringIndexOutOfBoundsException
629     * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
630     */
631TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_string_bounds, artThrowStringBoundsFromCode
632
633    /*
634     * Called by managed code to create and deliver a StackOverflowError.
635     */
636NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
637
638    /*
639     * Called by managed code to create and deliver a NoSuchMethodError.
640     */
641ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
642
643    /*
644     * All generated callsites for interface invokes and invocation slow paths will load arguments
645     * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
646     * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
647     * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
648     *
649     * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
650     * of the target Method* in x0 and method->code_ in x1.
651     *
652     * If unsuccessful, the helper will return null/????. There will be a pending exception in the
653     * thread and we branch to another stub to deliver it.
654     *
655     * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
656     * pointing back to the original caller.
657     *
658     * Adapted from ARM32 code.
659     *
660     * Clobbers xIP0.
661     */
662.macro INVOKE_TRAMPOLINE_BODY cxx_name
663    .extern \cxx_name
664    SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
665    // Helper signature is always
666    // (method_idx, *this_object, *caller_method, *self, sp)
667
668    mov    x2, xSELF                      // pass Thread::Current
669    mov    x3, sp
670    bl     \cxx_name                      // (method_idx, this, Thread*, SP)
671    mov    xIP0, x1                       // save Method*->code_
672    RESTORE_SAVE_REFS_AND_ARGS_FRAME
673    cbz    x0, 1f                         // did we find the target? if not go to exception delivery
674    br     xIP0                           // tail call to target
6751:
676    DELIVER_PENDING_EXCEPTION
677.endm
678.macro INVOKE_TRAMPOLINE c_name, cxx_name
679ENTRY \c_name
680    INVOKE_TRAMPOLINE_BODY \cxx_name
681END \c_name
682.endm
683
684INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
685
686INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
687INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
688INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
689INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
690
691
692.macro INVOKE_STUB_CREATE_FRAME
693
694SAVE_SIZE=15*8   // x4, x5, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
695SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
696
697
698    mov x9, sp                             // Save stack pointer.
699    .cfi_register sp,x9
700
701    add x10, x2, # SAVE_SIZE_AND_METHOD    // calculate size of frame.
702    sub x10, sp, x10                       // Calculate SP position - saves + ArtMethod* + args
703    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
704    mov sp, x10                            // Set new SP.
705
706    sub x10, x9, #SAVE_SIZE                // Calculate new FP (later). Done here as we must move SP
707    .cfi_def_cfa_register x10              // before this.
708    .cfi_adjust_cfa_offset SAVE_SIZE
709
710    str x28, [x10, #112]
711    .cfi_rel_offset x28, 112
712
713    stp x26, x27, [x10, #96]
714    .cfi_rel_offset x26, 96
715    .cfi_rel_offset x27, 104
716
717    stp x24, x25, [x10, #80]
718    .cfi_rel_offset x24, 80
719    .cfi_rel_offset x25, 88
720
721    stp x22, x23, [x10, #64]
722    .cfi_rel_offset x22, 64
723    .cfi_rel_offset x23, 72
724
725    stp x20, x21, [x10, #48]
726    .cfi_rel_offset x20, 48
727    .cfi_rel_offset x21, 56
728
729    stp x9, x19, [x10, #32]                // Save old stack pointer and x19.
730    .cfi_rel_offset sp, 32
731    .cfi_rel_offset x19, 40
732
733    stp x4, x5, [x10, #16]                 // Save result and shorty addresses.
734    .cfi_rel_offset x4, 16
735    .cfi_rel_offset x5, 24
736
737    stp xFP, xLR, [x10]                    // Store LR & FP.
738    .cfi_rel_offset x29, 0
739    .cfi_rel_offset x30, 8
740
741    mov xFP, x10                           // Use xFP now, as it's callee-saved.
742    .cfi_def_cfa_register x29
743    mov xSELF, x3                          // Move thread pointer into SELF register.
744
745    // Copy arguments into stack frame.
746    // Use simple copy routine for now.
747    // 4 bytes per slot.
748    // X1 - source address
749    // W2 - args length
750    // X9 - destination address.
751    // W10 - temporary
752    add x9, sp, #8                         // Destination address is bottom of stack + null.
753
754    // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
755    // does not have unique-id variables.
7561:
757    cmp w2, #0
758    beq 2f
759    sub w2, w2, #4      // Need 65536 bytes of range.
760    ldr w10, [x1, x2]
761    str w10, [x9, x2]
762
763    b 1b
764
7652:
766    // Store null into ArtMethod* at bottom of frame.
767    str xzr, [sp]
768.endm
769
770.macro INVOKE_STUB_CALL_AND_RETURN
771
772    // load method-> METHOD_QUICK_CODE_OFFSET
773    ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
774    // Branch to method.
775    blr x9
776
777    // Restore return value address and shorty address.
778    ldp x4,x5, [xFP, #16]
779    .cfi_restore x4
780    .cfi_restore x5
781
782    ldr x28, [xFP, #112]
783    .cfi_restore x28
784
785    ldp x26, x27, [xFP, #96]
786    .cfi_restore x26
787    .cfi_restore x27
788
789    ldp x24, x25, [xFP, #80]
790    .cfi_restore x24
791    .cfi_restore x25
792
793    ldp x22, x23, [xFP, #64]
794    .cfi_restore x22
795    .cfi_restore x23
796
797    ldp x20, x21, [xFP, #48]
798    .cfi_restore x20
799    .cfi_restore x21
800
801    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
802    ldrb w10, [x5]
803
804    // Check the return type and store the correct register into the jvalue in memory.
805    // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
806
807    // Don't set anything for a void type.
808    cmp w10, #'V'
809    beq 3f
810
811    // Is it a double?
812    cmp w10, #'D'
813    bne 1f
814    str d0, [x4]
815    b 3f
816
8171:  // Is it a float?
818    cmp w10, #'F'
819    bne 2f
820    str s0, [x4]
821    b 3f
822
8232:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
824    str x0, [x4]
825
8263:  // Finish up.
827    ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
828    .cfi_restore x19
829    mov sp, x2
830    .cfi_restore sp
831
832    ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
833    .cfi_restore x29
834    .cfi_restore x30
835
836    ret
837
838.endm
839
840
841/*
842 *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
843 *                                       uint32_t  *args,     x1
844 *                                       uint32_t argsize,    w2
845 *                                       Thread *self,        x3
846 *                                       JValue *result,      x4
847 *                                       char   *shorty);     x5
848 *  +----------------------+
849 *  |                      |
850 *  |  C/C++ frame         |
851 *  |       LR''           |
852 *  |       FP''           | <- SP'
853 *  +----------------------+
854 *  +----------------------+
855 *  |        x28           | <- TODO: Remove callee-saves.
856 *  |         :            |
857 *  |        x19           |
858 *  |        SP'           |
859 *  |        X5            |
860 *  |        X4            |        Saved registers
861 *  |        LR'           |
862 *  |        FP'           | <- FP
863 *  +----------------------+
864 *  | uint32_t out[n-1]    |
865 *  |    :      :          |        Outs
866 *  | uint32_t out[0]      |
867 *  | ArtMethod*           | <- SP  value=null
868 *  +----------------------+
869 *
870 * Outgoing registers:
871 *  x0    - Method*
872 *  x1-x7 - integer parameters.
873 *  d0-d7 - Floating point parameters.
874 *  xSELF = self
875 *  SP = & of ArtMethod*
876 *  x1 = "this" pointer.
877 *
878 */
879ENTRY art_quick_invoke_stub
880    // Spill registers as per AACPS64 calling convention.
881    INVOKE_STUB_CREATE_FRAME
882
883    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
884    // Parse the passed shorty to determine which register to load.
885    // Load addresses for routines that load WXSD registers.
886    adr  x11, .LstoreW2
887    adr  x12, .LstoreX2
888    adr  x13, .LstoreS0
889    adr  x14, .LstoreD0
890
891    // Initialize routine offsets to 0 for integers and floats.
892    // x8 for integers, x15 for floating point.
893    mov x8, #0
894    mov x15, #0
895
896    add x10, x5, #1         // Load shorty address, plus one to skip return value.
897    ldr w1, [x9],#4         // Load "this" parameter, and increment arg pointer.
898
899    // Loop to fill registers.
900.LfillRegisters:
901    ldrb w17, [x10], #1       // Load next character in signature, and increment.
902    cbz w17, .LcallFunction   // Exit at end of signature. Shorty 0 terminated.
903
904    cmp  w17, #'F' // is this a float?
905    bne .LisDouble
906
907    cmp x15, # 8*12         // Skip this load if all registers full.
908    beq .Ladvance4
909
910    add x17, x13, x15       // Calculate subroutine to jump to.
911    br  x17
912
913.LisDouble:
914    cmp w17, #'D'           // is this a double?
915    bne .LisLong
916
917    cmp x15, # 8*12         // Skip this load if all registers full.
918    beq .Ladvance8
919
920    add x17, x14, x15       // Calculate subroutine to jump to.
921    br x17
922
923.LisLong:
924    cmp w17, #'J'           // is this a long?
925    bne .LisOther
926
927    cmp x8, # 6*12          // Skip this load if all registers full.
928    beq .Ladvance8
929
930    add x17, x12, x8        // Calculate subroutine to jump to.
931    br x17
932
933.LisOther:                  // Everything else takes one vReg.
934    cmp x8, # 6*12          // Skip this load if all registers full.
935    beq .Ladvance4
936
937    add x17, x11, x8        // Calculate subroutine to jump to.
938    br x17
939
940.Ladvance4:
941    add x9, x9, #4
942    b .LfillRegisters
943
944.Ladvance8:
945    add x9, x9, #8
946    b .LfillRegisters
947
948// Macro for loading a parameter into a register.
949//  counter - the register with offset into these tables
950//  size - the size of the register - 4 or 8 bytes.
951//  register - the name of the register to be loaded.
952.macro LOADREG counter size register return
953    ldr \register , [x9], #\size
954    add \counter, \counter, 12
955    b \return
956.endm
957
958// Store ints.
959.LstoreW2:
960    LOADREG x8 4 w2 .LfillRegisters
961    LOADREG x8 4 w3 .LfillRegisters
962    LOADREG x8 4 w4 .LfillRegisters
963    LOADREG x8 4 w5 .LfillRegisters
964    LOADREG x8 4 w6 .LfillRegisters
965    LOADREG x8 4 w7 .LfillRegisters
966
967// Store longs.
968.LstoreX2:
969    LOADREG x8 8 x2 .LfillRegisters
970    LOADREG x8 8 x3 .LfillRegisters
971    LOADREG x8 8 x4 .LfillRegisters
972    LOADREG x8 8 x5 .LfillRegisters
973    LOADREG x8 8 x6 .LfillRegisters
974    LOADREG x8 8 x7 .LfillRegisters
975
976// Store singles.
977.LstoreS0:
978    LOADREG x15 4 s0 .LfillRegisters
979    LOADREG x15 4 s1 .LfillRegisters
980    LOADREG x15 4 s2 .LfillRegisters
981    LOADREG x15 4 s3 .LfillRegisters
982    LOADREG x15 4 s4 .LfillRegisters
983    LOADREG x15 4 s5 .LfillRegisters
984    LOADREG x15 4 s6 .LfillRegisters
985    LOADREG x15 4 s7 .LfillRegisters
986
987// Store doubles.
988.LstoreD0:
989    LOADREG x15 8 d0 .LfillRegisters
990    LOADREG x15 8 d1 .LfillRegisters
991    LOADREG x15 8 d2 .LfillRegisters
992    LOADREG x15 8 d3 .LfillRegisters
993    LOADREG x15 8 d4 .LfillRegisters
994    LOADREG x15 8 d5 .LfillRegisters
995    LOADREG x15 8 d6 .LfillRegisters
996    LOADREG x15 8 d7 .LfillRegisters
997
998
999.LcallFunction:
1000
1001    INVOKE_STUB_CALL_AND_RETURN
1002
1003END art_quick_invoke_stub
1004
1005/*  extern"C"
1006 *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
1007 *                                       uint32_t  *args,     x1
1008 *                                       uint32_t argsize,    w2
1009 *                                       Thread *self,        x3
1010 *                                       JValue *result,      x4
1011 *                                       char   *shorty);     x5
1012 */
1013ENTRY art_quick_invoke_static_stub
1014    // Spill registers as per AACPS64 calling convention.
1015    INVOKE_STUB_CREATE_FRAME
1016
1017    // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
1018    // Parse the passed shorty to determine which register to load.
1019    // Load addresses for routines that load WXSD registers.
1020    adr  x11, .LstoreW1_2
1021    adr  x12, .LstoreX1_2
1022    adr  x13, .LstoreS0_2
1023    adr  x14, .LstoreD0_2
1024
1025    // Initialize routine offsets to 0 for integers and floats.
1026    // x8 for integers, x15 for floating point.
1027    mov x8, #0
1028    mov x15, #0
1029
1030    add x10, x5, #1     // Load shorty address, plus one to skip return value.
1031
1032    // Loop to fill registers.
1033.LfillRegisters2:
1034    ldrb w17, [x10], #1         // Load next character in signature, and increment.
1035    cbz w17, .LcallFunction2    // Exit at end of signature. Shorty 0 terminated.
1036
1037    cmp  w17, #'F'          // is this a float?
1038    bne .LisDouble2
1039
1040    cmp x15, # 8*12         // Skip this load if all registers full.
1041    beq .Ladvance4_2
1042
1043    add x17, x13, x15       // Calculate subroutine to jump to.
1044    br  x17
1045
1046.LisDouble2:
1047    cmp w17, #'D'           // is this a double?
1048    bne .LisLong2
1049
1050    cmp x15, # 8*12         // Skip this load if all registers full.
1051    beq .Ladvance8_2
1052
1053    add x17, x14, x15       // Calculate subroutine to jump to.
1054    br x17
1055
1056.LisLong2:
1057    cmp w17, #'J'           // is this a long?
1058    bne .LisOther2
1059
1060    cmp x8, # 7*12          // Skip this load if all registers full.
1061    beq .Ladvance8_2
1062
1063    add x17, x12, x8        // Calculate subroutine to jump to.
1064    br x17
1065
1066.LisOther2:                 // Everything else takes one vReg.
1067    cmp x8, # 7*12          // Skip this load if all registers full.
1068    beq .Ladvance4_2
1069
1070    add x17, x11, x8        // Calculate subroutine to jump to.
1071    br x17
1072
1073.Ladvance4_2:
1074    add x9, x9, #4
1075    b .LfillRegisters2
1076
1077.Ladvance8_2:
1078    add x9, x9, #8
1079    b .LfillRegisters2
1080
1081// Store ints.
1082.LstoreW1_2:
1083    LOADREG x8 4 w1 .LfillRegisters2
1084    LOADREG x8 4 w2 .LfillRegisters2
1085    LOADREG x8 4 w3 .LfillRegisters2
1086    LOADREG x8 4 w4 .LfillRegisters2
1087    LOADREG x8 4 w5 .LfillRegisters2
1088    LOADREG x8 4 w6 .LfillRegisters2
1089    LOADREG x8 4 w7 .LfillRegisters2
1090
1091// Store longs.
1092.LstoreX1_2:
1093    LOADREG x8 8 x1 .LfillRegisters2
1094    LOADREG x8 8 x2 .LfillRegisters2
1095    LOADREG x8 8 x3 .LfillRegisters2
1096    LOADREG x8 8 x4 .LfillRegisters2
1097    LOADREG x8 8 x5 .LfillRegisters2
1098    LOADREG x8 8 x6 .LfillRegisters2
1099    LOADREG x8 8 x7 .LfillRegisters2
1100
1101// Store singles.
1102.LstoreS0_2:
1103    LOADREG x15 4 s0 .LfillRegisters2
1104    LOADREG x15 4 s1 .LfillRegisters2
1105    LOADREG x15 4 s2 .LfillRegisters2
1106    LOADREG x15 4 s3 .LfillRegisters2
1107    LOADREG x15 4 s4 .LfillRegisters2
1108    LOADREG x15 4 s5 .LfillRegisters2
1109    LOADREG x15 4 s6 .LfillRegisters2
1110    LOADREG x15 4 s7 .LfillRegisters2
1111
1112// Store doubles.
1113.LstoreD0_2:
1114    LOADREG x15 8 d0 .LfillRegisters2
1115    LOADREG x15 8 d1 .LfillRegisters2
1116    LOADREG x15 8 d2 .LfillRegisters2
1117    LOADREG x15 8 d3 .LfillRegisters2
1118    LOADREG x15 8 d4 .LfillRegisters2
1119    LOADREG x15 8 d5 .LfillRegisters2
1120    LOADREG x15 8 d6 .LfillRegisters2
1121    LOADREG x15 8 d7 .LfillRegisters2
1122
1123
1124.LcallFunction2:
1125
1126    INVOKE_STUB_CALL_AND_RETURN
1127
1128END art_quick_invoke_static_stub
1129
1130
1131
1132/*  extern"C" void art_quick_osr_stub(void** stack,                x0
1133 *                                    size_t stack_size_in_bytes,  x1
1134 *                                    const uin8_t* native_pc,     x2
1135 *                                    JValue *result,              x3
1136 *                                    char   *shorty,              x4
1137 *                                    Thread *self)                x5
1138 */
1139ENTRY art_quick_osr_stub
1140SAVE_SIZE=15*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
1141    mov x9, sp                             // Save stack pointer.
1142    .cfi_register sp,x9
1143
1144    sub x10, sp, # SAVE_SIZE
1145    and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
1146    mov sp, x10                            // Set new SP.
1147
1148    str x28, [sp, #112]
1149    stp x26, x27, [sp, #96]
1150    stp x24, x25, [sp, #80]
1151    stp x22, x23, [sp, #64]
1152    stp x20, x21, [sp, #48]
1153    stp x9, x19, [sp, #32]                // Save old stack pointer and x19.
1154    stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
1155    stp xFP, xLR, [sp]                    // Store LR & FP.
1156    mov xSELF, x5                         // Move thread pointer into SELF register.
1157
1158    sub sp, sp, #16
1159    str xzr, [sp]                         // Store null for ArtMethod* slot
1160    // Branch to stub.
1161    bl .Losr_entry
1162    add sp, sp, #16
1163
1164    // Restore return value address and shorty address.
1165    ldp x3,x4, [sp, #16]
1166    ldr x28, [sp, #112]
1167    ldp x26, x27, [sp, #96]
1168    ldp x24, x25, [sp, #80]
1169    ldp x22, x23, [sp, #64]
1170    ldp x20, x21, [sp, #48]
1171
1172    // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
1173    ldrb w10, [x4]
1174
1175    // Check the return type and store the correct register into the jvalue in memory.
1176
1177    // Don't set anything for a void type.
1178    cmp w10, #'V'
1179    beq .Losr_exit
1180
1181    // Is it a double?
1182    cmp w10, #'D'
1183    bne .Lno_double
1184    str d0, [x3]
1185    b .Losr_exit
1186
1187.Lno_double:  // Is it a float?
1188    cmp w10, #'F'
1189    bne .Lno_float
1190    str s0, [x3]
1191    b .Losr_exit
1192
1193.Lno_float:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
1194    str x0, [x3]
1195
1196.Losr_exit:  // Finish up.
1197    ldp x2, x19, [sp, #32]   // Restore stack pointer and x19.
1198    ldp xFP, xLR, [sp]    // Restore old frame pointer and link register.
1199    mov sp, x2
1200    ret
1201
1202.Losr_entry:
1203    // Update stack pointer for the callee
1204    sub sp, sp, x1
1205
1206    // Update link register slot expected by the callee.
1207    sub w1, w1, #8
1208    str lr, [sp, x1]
1209
1210    // Copy arguments into stack frame.
1211    // Use simple copy routine for now.
1212    // 4 bytes per slot.
1213    // X0 - source address
1214    // W1 - args length
1215    // SP - destination address.
1216    // W10 - temporary
1217.Losr_loop_entry:
1218    cmp w1, #0
1219    beq .Losr_loop_exit
1220    sub w1, w1, #4
1221    ldr w10, [x0, x1]
1222    str w10, [sp, x1]
1223    b .Losr_loop_entry
1224
1225.Losr_loop_exit:
1226    // Branch to the OSR entry point.
1227    br x2
1228
1229END art_quick_osr_stub
1230
1231    /*
1232     * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
1233     */
1234
1235ENTRY art_quick_do_long_jump
1236    // Load FPRs
1237    ldp d0, d1, [x1], #16
1238    ldp d2, d3, [x1], #16
1239    ldp d4, d5, [x1], #16
1240    ldp d6, d7, [x1], #16
1241    ldp d8, d9, [x1], #16
1242    ldp d10, d11, [x1], #16
1243    ldp d12, d13, [x1], #16
1244    ldp d14, d15, [x1], #16
1245    ldp d16, d17, [x1], #16
1246    ldp d18, d19, [x1], #16
1247    ldp d20, d21, [x1], #16
1248    ldp d22, d23, [x1], #16
1249    ldp d24, d25, [x1], #16
1250    ldp d26, d27, [x1], #16
1251    ldp d28, d29, [x1], #16
1252    ldp d30, d31, [x1]
1253
1254    // Load GPRs
1255    // TODO: lots of those are smashed, could optimize.
1256    add x0, x0, #30*8
1257    ldp x30, x1, [x0], #-16          // LR & SP
1258    ldp x28, x29, [x0], #-16
1259    ldp x26, x27, [x0], #-16
1260    ldp x24, x25, [x0], #-16
1261    ldp x22, x23, [x0], #-16
1262    ldp x20, x21, [x0], #-16
1263    ldp x18, x19, [x0], #-16
1264    ldp x16, x17, [x0], #-16
1265    ldp x14, x15, [x0], #-16
1266    ldp x12, x13, [x0], #-16
1267    ldp x10, x11, [x0], #-16
1268    ldp x8, x9, [x0], #-16
1269    ldp x6, x7, [x0], #-16
1270    ldp x4, x5, [x0], #-16
1271    ldp x2, x3, [x0], #-16
1272    mov sp, x1
1273
1274    // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
1275    ldr x1, [x0, #33*8]
1276    // And the value of x0.
1277    ldr x0, [x0]
1278
1279    br  x1
1280END art_quick_do_long_jump
1281
1282    /*
1283     * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
1284     * possibly null object to lock.
1285     *
1286     * Derived from arm32 code.
1287     */
1288    .extern artLockObjectFromCode
1289ENTRY art_quick_lock_object
1290    cbz    w0, .Lslow_lock
1291    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
1292.Lretry_lock:
1293    ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
1294    ldxr   w1, [x4]
1295    mov    x3, x1
1296    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1297    cbnz   w3, .Lnot_unlocked         // already thin locked
1298    // unlocked case - x1: original lock word that's zero except for the read barrier bits.
1299    orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
1300    stxr   w3, w2, [x4]
1301    cbnz   w3, .Llock_stxr_fail       // store failed, retry
1302    dmb    ishld                      // full (LoadLoad|LoadStore) memory barrier
1303    ret
1304.Lnot_unlocked:  // x1: original lock word
1305    lsr    w3, w1, LOCK_WORD_STATE_SHIFT
1306    cbnz   w3, .Lslow_lock            // if either of the top two bits are set, go slow path
1307    eor    w2, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
1308    uxth   w2, w2                     // zero top 16 bits
1309    cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
1310                                      // else contention, go to slow path
1311    mov    x3, x1                     // copy the lock word to check count overflow.
1312    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
1313    add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
1314    lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
1315    cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
1316    add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
1317    stxr   w3, w2, [x4]
1318    cbnz   w3, .Llock_stxr_fail       // store failed, retry
1319    ret
1320.Llock_stxr_fail:
1321    b      .Lretry_lock               // retry
1322.Lslow_lock:
1323    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1324    mov    x1, xSELF                  // pass Thread::Current
1325    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1326    RESTORE_SAVE_REFS_ONLY_FRAME
1327    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1328END art_quick_lock_object
1329
1330ENTRY art_quick_lock_object_no_inline
1331    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
1332    mov    x1, xSELF                  // pass Thread::Current
1333    bl     artLockObjectFromCode      // (Object* obj, Thread*)
1334    RESTORE_SAVE_REFS_ONLY_FRAME
1335    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1336END art_quick_lock_object_no_inline
1337
1338    /*
1339     * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
1340     * x0 holds the possibly null object to lock.
1341     *
1342     * Derived from arm32 code.
1343     */
1344    .extern artUnlockObjectFromCode
1345ENTRY art_quick_unlock_object
1346    cbz    x0, .Lslow_unlock
1347    add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
1348.Lretry_unlock:
1349#ifndef USE_READ_BARRIER
1350    ldr    w1, [x4]
1351#else
1352    ldxr   w1, [x4]                   // Need to use atomic instructions for read barrier
1353#endif
1354    lsr    w2, w1, LOCK_WORD_STATE_SHIFT
1355    cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
1356    ldr    w2, [xSELF, #THREAD_ID_OFFSET]
1357    mov    x3, x1                     // copy lock word to check thread id equality
1358    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1359    eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
1360    uxth   w3, w3                     // zero top 16 bits
1361    cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
1362    mov    x3, x1                     // copy lock word to detect transition to unlocked
1363    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
1364    cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
1365    bpl    .Lrecursive_thin_unlock
1366    // transition to unlocked
1367    mov    x3, x1
1368    and    w3, w3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
1369    dmb    ish                        // full (LoadStore|StoreStore) memory barrier
1370#ifndef USE_READ_BARRIER
1371    str    w3, [x4]
1372#else
1373    stxr   w2, w3, [x4]               // Need to use atomic instructions for read barrier
1374    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
1375#endif
1376    ret
1377.Lrecursive_thin_unlock:  // w1: original lock word
1378    sub    w1, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
1379#ifndef USE_READ_BARRIER
1380    str    w1, [x4]
1381#else
1382    stxr   w2, w1, [x4]               // Need to use atomic instructions for read barrier
1383    cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
1384#endif
1385    ret
1386.Lunlock_stxr_fail:
1387    b      .Lretry_unlock               // retry
1388.Lslow_unlock:
1389    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1390    mov    x1, xSELF                  // pass Thread::Current
1391    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1392    RESTORE_SAVE_REFS_ONLY_FRAME
1393    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1394END art_quick_unlock_object
1395
1396ENTRY art_quick_unlock_object_no_inline
1397    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
1398    mov    x1, xSELF                  // pass Thread::Current
1399    bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
1400    RESTORE_SAVE_REFS_ONLY_FRAME
1401    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1402END art_quick_unlock_object_no_inline
1403
1404    /*
1405     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
1406     * artThrowClassCastException.
1407     */
1408    .extern artThrowClassCastException
1409ENTRY art_quick_check_cast
1410    // Store arguments and link register
1411    // Stack needs to be 16B aligned on calls.
1412    stp x0, x1, [sp,#-32]!
1413    .cfi_adjust_cfa_offset 32
1414    .cfi_rel_offset x0, 0
1415    .cfi_rel_offset x1, 8
1416    str xLR, [sp, #24]
1417    .cfi_rel_offset x30, 24
1418
1419    // Call runtime code
1420    bl artIsAssignableFromCode
1421
1422    // Check for exception
1423    cbz x0, .Lthrow_class_cast_exception
1424
1425    // Restore and return
1426    ldr xLR, [sp, #24]
1427    .cfi_restore x30
1428    ldp x0, x1, [sp], #32
1429    .cfi_restore x0
1430    .cfi_restore x1
1431    .cfi_adjust_cfa_offset -32
1432    ret
1433
1434    .cfi_adjust_cfa_offset 32         // Reset unwind info so following code unwinds.
1435
1436.Lthrow_class_cast_exception:
1437    // Restore
1438    ldr xLR, [sp, #24]
1439    .cfi_restore x30
1440    ldp x0, x1, [sp], #32
1441    .cfi_restore x0
1442    .cfi_restore x1
1443    .cfi_adjust_cfa_offset -32
1444
1445    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
1446    mov x2, xSELF                     // pass Thread::Current
1447    b artThrowClassCastException      // (Class*, Class*, Thread*)
1448    brk 0                             // We should not return here...
1449END art_quick_check_cast
1450
1451// Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
1452.macro POP_REG_NE xReg, offset, xExclude
1453    .ifnc \xReg, \xExclude
1454        ldr \xReg, [sp, #\offset]     // restore xReg
1455        .cfi_restore \xReg
1456    .endif
1457.endm
1458
1459// Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
1460// Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
1461.macro POP_REGS_NE xReg1, xReg2, offset, xExclude
1462    .ifc \xReg1, \xExclude
1463        ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
1464    .else
1465        .ifc \xReg2, \xExclude
1466            ldr \xReg1, [sp, #\offset]          // restore xReg1
1467        .else
1468            ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
1469        .endif
1470    .endif
1471    .cfi_restore \xReg1
1472    .cfi_restore \xReg2
1473.endm
1474
1475    /*
1476     * Macro to insert read barrier, only used in art_quick_aput_obj.
1477     * xDest, wDest and xObj are registers, offset is a defined literal such as
1478     * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle
1479     * name mismatch between instructions. This macro uses the lower 32b of register when possible.
1480     * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
1481     */
1482.macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
1483#ifdef USE_READ_BARRIER
1484#ifdef USE_BAKER_READ_BARRIER
1485    ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
1486    tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
1487    // False dependency to avoid needing load/load fence.
1488    add \xObj, \xObj, \xTemp, lsr #32
1489    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1490    UNPOISON_HEAP_REF \wDest
1491    b .Lrb_exit\number
1492#endif
1493.Lrb_slowpath\number:
1494    // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
1495    stp x0, x1, [sp, #-48]!
1496    .cfi_adjust_cfa_offset 48
1497    .cfi_rel_offset x0, 0
1498    .cfi_rel_offset x1, 8
1499    stp x2, x3, [sp, #16]
1500    .cfi_rel_offset x2, 16
1501    .cfi_rel_offset x3, 24
1502    stp x4, xLR, [sp, #32]
1503    .cfi_rel_offset x4, 32
1504    .cfi_rel_offset x30, 40
1505
1506    // mov x0, \xRef                // pass ref in x0 (no-op for now since parameter ref is unused)
1507    .ifnc \xObj, x1
1508        mov x1, \xObj               // pass xObj
1509    .endif
1510    mov w2, #\offset                // pass offset
1511    bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
1512    // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
1513    .ifnc \wDest, w0
1514        mov \wDest, w0              // save return value in wDest
1515    .endif
1516
1517    // Conditionally restore saved registers
1518    POP_REG_NE x0, 0, \xDest
1519    POP_REG_NE x1, 8, \xDest
1520    POP_REG_NE x2, 16, \xDest
1521    POP_REG_NE x3, 24, \xDest
1522    POP_REG_NE x4, 32, \xDest
1523    ldr xLR, [sp, #40]
1524    .cfi_restore x30
1525    add sp, sp, #48
1526    .cfi_adjust_cfa_offset -48
1527.Lrb_exit\number:
1528#else
1529    ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
1530    UNPOISON_HEAP_REF \wDest
1531#endif  // USE_READ_BARRIER
1532.endm
1533
1534    /*
1535     * Entry from managed code for array put operations of objects where the value being stored
1536     * needs to be checked for compatibility.
1537     * x0 = array, x1 = index, x2 = value
1538     *
1539     * Currently all values should fit into w0/w1/w2, and w1 always will as indices are 32b. We
1540     * assume, though, that the upper 32b are zeroed out. At least for x1/w1 we can do better by
1541     * using index-zero-extension in load/stores.
1542     *
1543     * Temporaries: x3, x4
1544     * TODO: x4 OK? ip seems wrong here.
1545     */
1546ENTRY art_quick_aput_obj_with_null_and_bound_check
1547    tst x0, x0
1548    bne art_quick_aput_obj_with_bound_check
1549    b art_quick_throw_null_pointer_exception
1550END art_quick_aput_obj_with_null_and_bound_check
1551
1552ENTRY art_quick_aput_obj_with_bound_check
1553    ldr w3, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]
1554    cmp w3, w1
1555    bhi art_quick_aput_obj
1556    mov x0, x1
1557    mov x1, x3
1558    b art_quick_throw_array_bounds
1559END art_quick_aput_obj_with_bound_check
1560
1561#ifdef USE_READ_BARRIER
1562    .extern artReadBarrierSlow
1563#endif
1564ENTRY art_quick_aput_obj
1565    cbz x2, .Ldo_aput_null
1566    READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
1567                                                                    // This also zero-extends to x3
1568    READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
1569    // This also zero-extends to x3
1570    READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
1571                                                                    // This also zero-extends to x4
1572    cmp w3, w4  // value's type == array's component type - trivial assignability
1573    bne .Lcheck_assignability
1574.Ldo_aput:
1575    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1576                                                         // "Compress" = do nothing
1577    POISON_HEAP_REF w2
1578    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1579    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1580    lsr x0, x0, #7
1581    strb w3, [x3, x0]
1582    ret
1583.Ldo_aput_null:
1584    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1585                                                         // "Compress" = do nothing
1586    str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
1587    ret
1588.Lcheck_assignability:
1589    // Store arguments and link register
1590    stp x0, x1, [sp,#-32]!
1591    .cfi_adjust_cfa_offset 32
1592    .cfi_rel_offset x0, 0
1593    .cfi_rel_offset x1, 8
1594    stp x2, xLR, [sp, #16]
1595    .cfi_rel_offset x2, 16
1596    .cfi_rel_offset x30, 24
1597
1598    // Call runtime code
1599    mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1600    mov x1, x4              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
1601    bl artIsAssignableFromCode
1602
1603    // Check for exception
1604    cbz x0, .Lthrow_array_store_exception
1605
1606    // Restore
1607    ldp x2, x30, [sp, #16]
1608    .cfi_restore x2
1609    .cfi_restore x30
1610    ldp x0, x1, [sp], #32
1611    .cfi_restore x0
1612    .cfi_restore x1
1613    .cfi_adjust_cfa_offset -32
1614
1615    add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
1616                                                          // "Compress" = do nothing
1617    POISON_HEAP_REF w2
1618    str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
1619    ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
1620    lsr x0, x0, #7
1621    strb w3, [x3, x0]
1622    ret
1623    .cfi_adjust_cfa_offset 32  // 4 restores after cbz for unwinding.
1624.Lthrow_array_store_exception:
1625    ldp x2, x30, [sp, #16]
1626    .cfi_restore x2
1627    .cfi_restore x30
1628    ldp x0, x1, [sp], #32
1629    .cfi_restore x0
1630    .cfi_restore x1
1631    .cfi_adjust_cfa_offset -32
1632
1633    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
1634    mov x1, x2                    // Pass value.
1635    mov x2, xSELF                 // Pass Thread::Current.
1636    b artThrowArrayStoreException // (Object*, Object*, Thread*).
1637    brk 0                         // Unreached.
1638END art_quick_aput_obj
1639
1640// Macro to facilitate adding new allocation entrypoints.
1641.macro ONE_ARG_DOWNCALL name, entrypoint, return
1642    .extern \entrypoint
1643ENTRY \name
1644    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1645    mov    x1, xSELF                  // pass Thread::Current
1646    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1647    RESTORE_SAVE_REFS_ONLY_FRAME
1648    \return
1649END \name
1650.endm
1651
1652// Macro to facilitate adding new allocation entrypoints.
1653.macro TWO_ARG_DOWNCALL name, entrypoint, return
1654    .extern \entrypoint
1655ENTRY \name
1656    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1657    mov    x2, xSELF                  // pass Thread::Current
1658    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
1659    RESTORE_SAVE_REFS_ONLY_FRAME
1660    \return
1661END \name
1662.endm
1663
1664// Macro to facilitate adding new allocation entrypoints.
1665.macro THREE_ARG_DOWNCALL name, entrypoint, return
1666    .extern \entrypoint
1667ENTRY \name
1668    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1669    mov    x3, xSELF                  // pass Thread::Current
1670    bl     \entrypoint
1671    RESTORE_SAVE_REFS_ONLY_FRAME
1672    \return
1673END \name
1674.endm
1675
1676// Macro to facilitate adding new allocation entrypoints.
1677.macro FOUR_ARG_DOWNCALL name, entrypoint, return
1678    .extern \entrypoint
1679ENTRY \name
1680    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1681    mov    x4, xSELF                  // pass Thread::Current
1682    bl     \entrypoint                //
1683    RESTORE_SAVE_REFS_ONLY_FRAME
1684    \return
1685    DELIVER_PENDING_EXCEPTION
1686END \name
1687.endm
1688
1689// Macros taking opportunity of code similarities for downcalls with referrer.
1690.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
1691    .extern \entrypoint
1692ENTRY \name
1693    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1694    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
1695    mov    x2, xSELF                  // pass Thread::Current
1696    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
1697    RESTORE_SAVE_REFS_ONLY_FRAME
1698    \return
1699END \name
1700.endm
1701
1702.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
1703    .extern \entrypoint
1704ENTRY \name
1705    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1706    ldr    x2, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
1707    mov    x3, xSELF                  // pass Thread::Current
1708    bl     \entrypoint
1709    RESTORE_SAVE_REFS_ONLY_FRAME
1710    \return
1711END \name
1712.endm
1713
1714.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
1715    .extern \entrypoint
1716ENTRY \name
1717    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1718    ldr    x3, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
1719    mov    x4, xSELF                  // pass Thread::Current
1720    bl     \entrypoint
1721    RESTORE_SAVE_REFS_ONLY_FRAME
1722    \return
1723END \name
1724.endm
1725
1726.macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1727    cbz w0, 1f                 // result zero branch over
1728    ret                        // return
17291:
1730    DELIVER_PENDING_EXCEPTION
1731.endm
1732
1733    /*
1734     * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
1735     * failure.
1736     */
1737TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1738
1739    /*
1740     * Entry from managed code when uninitialized static storage, this stub will run the class
1741     * initializer and deliver the exception on error. On success the static storage base is
1742     * returned.
1743     */
1744ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1745
1746ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1747ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1748
1749ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1750ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1751ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1752ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1753ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1754ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1755ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1756
1757TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1758TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1759TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1760TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1761TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1762TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1763TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
1764
1765TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1766TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1767TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1768TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1769
1770THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1771THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1772THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1773THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1774THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
1775
1776// This is separated out as the argument order is different.
1777    .extern artSet64StaticFromCode
1778ENTRY art_quick_set64_static
1779    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
1780    ldr    x1, [sp, #FRAME_SIZE_SAVE_REFS_ONLY] // Load referrer
1781                                      // x2 contains the parameter
1782    mov    x3, xSELF                  // pass Thread::Current
1783    bl     artSet64StaticFromCode
1784    RESTORE_SAVE_REFS_ONLY_FRAME
1785    RETURN_IF_W0_IS_ZERO_OR_DELIVER
1786END art_quick_set64_static
1787
1788    /*
1789     * Entry from managed code to resolve a string, this stub will
1790     * check the dex cache for a matching string (the fast path), and if not found,
1791     * it will allocate a String and deliver an exception on error.
1792     * On success the String is returned. R0 holds the string index.
1793     */
1794
1795ENTRY art_quick_resolve_string
1796    ldr   x1, [sp]                                               // load referrer
1797    ldr   w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET]           // load declaring class
1798    ldr   x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]    // load string dex cache
1799    and   x2, x0, #STRING_DEX_CACHE_SIZE_MINUS_ONE               // get masked string index into x2
1800    ldr   x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x2
1801    cmp   x0, x2, lsr #32                                         // compare against upper 32 bits
1802    bne   .Lart_quick_resolve_string_slow_path
1803    ubfx  x0, x2, #0, #32                                        // extract lower 32 bits into x0
1804#ifdef USE_READ_BARRIER
1805    // Most common case: GC is not marking.
1806    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
1807    cbnz   x3, .Lart_quick_resolve_string_marking
1808#endif
1809    ret
1810
1811// Slow path case, the index did not match.
1812.Lart_quick_resolve_string_slow_path:
1813    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
1814    mov   x1, xSELF                                 // pass Thread::Current
1815    bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
1816    RESTORE_SAVE_REFS_ONLY_FRAME
1817    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1818
1819// GC is marking case, need to check the mark bit.
1820.Lart_quick_resolve_string_marking:
1821    ldr   x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
1822    tbnz  x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
1823    // Save LR so that we can return, also x1 for alignment purposes.
1824    stp    x1, xLR, [sp, #-16]!                     // Save x1, LR.
1825    bl     artReadBarrierMark                       // Get the marked string back.
1826    ldp    x1, xLR, [sp], #16                       // Restore registers.
1827.Lart_quick_resolve_string_no_rb:
1828    ret
1829
1830END art_quick_resolve_string
1831
1832// Generate the allocation entrypoints for each allocator.
1833GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_REGION_TLAB_ALLOCATORS
1834// Comment out allocators that have arm64 specific asm.
1835// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB) implemented in asm
1836// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
1837// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
1838GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1839// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB) implemented in asm
1840// GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
1841GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1842GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
1843GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
1844GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
1845GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
1846GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
1847
1848// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
1849ENTRY art_quick_alloc_object_rosalloc
1850    // Fast path rosalloc allocation.
1851    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
1852    // x2-x7: free.
1853    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
1854                                                              // Load the class (x2)
1855    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
1856    cbz    x2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
1857                                                              // Check class status.
1858    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]
1859    cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
1860    bne    .Lart_quick_alloc_object_rosalloc_slow_path
1861                                                              // Add a fake dependence from the
1862                                                              // following access flag and size
1863                                                              // loads to the status load.
1864                                                              // This is to prevent those loads
1865                                                              // from being reordered above the
1866                                                              // status load and reading wrong
1867                                                              // values (an alternative is to use
1868                                                              // a load-acquire for the status).
1869    eor    x3, x3, x3
1870    add    x2, x2, x3
1871                                                              // Check access flags has
1872                                                              // kAccClassIsFinalizable
1873    ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
1874    tst    x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
1875    bne    .Lart_quick_alloc_object_rosalloc_slow_path
1876    ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
1877                                                              // allocation stack has room.
1878                                                              // ldp won't work due to large offset.
1879    ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
1880    cmp    x3, x4
1881    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
1882    ldr    w3, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (x3)
1883    cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
1884                                                              // local allocation
1885    bhs    .Lart_quick_alloc_object_rosalloc_slow_path
1886                                                              // Compute the rosalloc bracket index
1887                                                              // from the size.
1888                                                              // Align up the size by the rosalloc
1889                                                              // bracket quantum size and divide
1890                                                              // by the quantum size and subtract
1891                                                              // by 1. This code is a shorter but
1892                                                              // equivalent version.
1893    sub    x3, x3, #1
1894    lsr    x3, x3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT
1895                                                              // Load the rosalloc run (x4)
1896    add    x4, xSELF, x3, lsl #POINTER_SIZE_SHIFT
1897    ldr    x4, [x4, #THREAD_ROSALLOC_RUNS_OFFSET]
1898                                                              // Load the free list head (x3). This
1899                                                              // will be the return val.
1900    ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1901    cbz    x3, .Lart_quick_alloc_object_rosalloc_slow_path
1902    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
1903    ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
1904                                                              // and update the list head with the
1905                                                              // next pointer.
1906    str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
1907                                                              // Store the class pointer in the
1908                                                              // header. This also overwrites the
1909                                                              // next pointer. The offsets are
1910                                                              // asserted to match.
1911#if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
1912#error "Class pointer needs to overwrite next pointer."
1913#endif
1914    POISON_HEAP_REF w2
1915    str    w2, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
1916                                                              // Fence. This is "ish" not "ishst" so
1917                                                              // that it also ensures ordering of
1918                                                              // the class status load with respect
1919                                                              // to later accesses to the class
1920                                                              // object. Alternatively we could use
1921                                                              // "ishst" if we use load-acquire for
1922                                                              // the class status load.)
1923                                                              // Needs to be done before pushing on
1924                                                              // allocation since Heap::VisitObjects
1925                                                              // relies on seeing the class pointer.
1926                                                              // b/28790624
1927    dmb    ish
1928                                                              // Push the new object onto the thread
1929                                                              // local allocation stack and
1930                                                              // increment the thread local
1931                                                              // allocation stack top.
1932    ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1933    str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
1934    str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
1935                                                              // Decrement the size of the free list
1936    ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1937    sub    x1, x1, #1
1938                                                              // TODO: consider combining this store
1939                                                              // and the list head store above using
1940                                                              // strd.
1941    str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
1942
1943    mov    x0, x3                                             // Set the return value and return.
1944    ret
1945.Lart_quick_alloc_object_rosalloc_slow_path:
1946    SETUP_SAVE_REFS_ONLY_FRAME             // save callee saves in case of GC
1947    mov    x2, xSELF                       // pass Thread::Current
1948    bl     artAllocObjectFromCodeRosAlloc  // (uint32_t type_idx, Method* method, Thread*)
1949    RESTORE_SAVE_REFS_ONLY_FRAME
1950    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
1951END art_quick_alloc_object_rosalloc
1952
1953
1954// The common fast path code for art_quick_alloc_array_region_tlab.
1955.macro ALLOC_ARRAY_TLAB_FAST_PATH slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1956    // Check null class
1957    cbz    \wClass, \slowPathLabel
1958    ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED \slowPathLabel, \xClass, \wClass, \xCount, \wCount, \xTemp0, \wTemp0, \xTemp1, \wTemp1, \xTemp2, \wTemp2
1959.endm
1960
1961// The common fast path code for art_quick_alloc_array_region_tlab.
1962.macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
1963    // Array classes are never finalizable or uninitialized, no need to check.
1964    ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
1965    UNPOISON_HEAP_REF \wTemp0
1966    ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
1967    lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
1968                                                              // bits.
1969                                                              // xCount is holding a 32 bit value,
1970                                                              // it can not overflow.
1971    lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
1972    // Add array data offset and alignment.
1973    add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
1974#if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
1975#error Long array data offset must be 4 greater than int array data offset.
1976#endif
1977
1978    add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
1979                                                              // component size shift is 3
1980                                                              // (for 64 bit alignment).
1981    and    \xTemp0, \xTemp0, #4
1982    add    \xTemp1, \xTemp1, \xTemp0
1983    and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignemnt mask
1984                                                              // (addr + 7) & ~7. The mask must
1985                                                              // be 64 bits to keep high bits in
1986                                                              // case of overflow.
1987    // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
1988    // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
1989    // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
1990    cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
1991    bhs    \slowPathLabel                                     // path.
1992
1993    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
1994                                                              // we use (end - begin) to handle
1995                                                              // negative size arrays. It is
1996                                                              // assumed that a negative size will
1997                                                              // always be greater unsigned than
1998                                                              // region size.
1999    ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
2000    sub    \xTemp2, \xTemp2, \xTemp0
2001    cmp    \xTemp1, \xTemp2
2002    bhi    \slowPathLabel
2003    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
2004                                                              // Move old thread_local_pos to x0
2005                                                              // for the return value.
2006    mov    x0, \xTemp0
2007    add    \xTemp0, \xTemp0, \xTemp1
2008    str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
2009    ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
2010    add    \xTemp0, \xTemp0, #1
2011    str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
2012    POISON_HEAP_REF \wClass
2013    str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
2014    str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
2015                                                              // Fence.
2016    dmb    ishst
2017    ret
2018.endm
2019
2020// The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
2021//
2022// x0: type_idx/return value, x1: ArtMethod*, x2: Class*, xSELF(x19): Thread::Current
2023// x3-x7: free.
2024// Need to preserve x0 and x1 to the slow path.
2025.macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
2026    cbz    x2, \slowPathLabel                                 // Check null class
2027    ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED \slowPathLabel
2028.endm
2029
2030.macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
2031    ldr    w3, [x2, #MIRROR_CLASS_STATUS_OFFSET]              // Check class status.
2032    cmp    x3, #MIRROR_CLASS_STATUS_INITIALIZED
2033    bne    \slowPathLabel
2034                                                              // Add a fake dependence from the
2035                                                              // following access flag and size
2036                                                              // loads to the status load.
2037                                                              // This is to prevent those loads
2038                                                              // from being reordered above the
2039                                                              // status load and reading wrong
2040                                                              // values (an alternative is to use
2041                                                              // a load-acquire for the status).
2042    eor    x3, x3, x3
2043    add    x2, x2, x3
2044    ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED \slowPathLabel
2045.endm
2046
2047.macro ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED slowPathLabel
2048                                                              // Check access flags has
2049                                                              // kAccClassIsFinalizable.
2050    ldr    w3, [x2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
2051    tbnz   x3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT, \slowPathLabel
2052                                                              // Load thread_local_pos (x4) and
2053                                                              // thread_local_end (x5).
2054    ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
2055    ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
2056    sub    x6, x5, x4                                         // Compute the remaining buf size.
2057    ldr    w7, [x2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (x7).
2058    cmp    x7, x6                                             // Check if it fits. OK to do this
2059                                                              // before rounding up the object size
2060                                                              // assuming the buf size alignment.
2061    bhi    \slowPathLabel
2062    // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
2063                                                              // Round up the object size by the
2064                                                              // object alignment. (addr + 7) & ~7.
2065    add    x7, x7, #OBJECT_ALIGNMENT_MASK
2066    and    x7, x7, #OBJECT_ALIGNMENT_MASK_TOGGLED
2067                                                              // Move old thread_local_pos to x0
2068                                                              // for the return value.
2069    mov    x0, x4
2070    add    x5, x0, x7
2071    str    x5, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
2072    ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
2073    add    x5, x5, #1
2074    str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
2075    POISON_HEAP_REF w2
2076    str    w2, [x0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
2077                                                              // Fence. This is "ish" not "ishst" so
2078                                                              // that the code after this allocation
2079                                                              // site will see the right values in
2080                                                              // the fields of the class.
2081                                                              // Alternatively we could use "ishst"
2082                                                              // if we use load-acquire for the
2083                                                              // class status load.)
2084    dmb    ish
2085    ret
2086.endm
2087
2088// A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
2089ENTRY art_quick_alloc_object_tlab
2090    // Fast path tlab allocation.
2091    // x0: type_idx/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
2092    // x2-x7: free.
2093#if defined(USE_READ_BARRIER)
2094    mvn    x0, xzr                                            // Read barrier not supported here.
2095    ret                                                       // Return -1.
2096#endif
2097    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
2098                                                              // Load the class (x2)
2099    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
2100    ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
2101.Lart_quick_alloc_object_tlab_slow_path:
2102    SETUP_SAVE_REFS_ONLY_FRAME           // Save callee saves in case of GC.
2103    mov    x2, xSELF                     // Pass Thread::Current.
2104    bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
2105    RESTORE_SAVE_REFS_ONLY_FRAME
2106    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
2107END art_quick_alloc_object_tlab
2108
2109// The common code for art_quick_alloc_object_*region_tlab
2110.macro GENERATE_ALLOC_OBJECT_REGION_TLAB name, entrypoint, fast_path, is_resolved
2111ENTRY \name
2112    // Fast path region tlab allocation.
2113    // x0: type_idx/resolved class/return value, x1: ArtMethod*, xSELF(x19): Thread::Current
2114    // If is_resolved is 1 then x0 is the resolved type, otherwise it is the index.
2115    // x2-x7: free.
2116#if !defined(USE_READ_BARRIER)
2117    mvn    x0, xzr                                            // Read barrier must be enabled here.
2118    ret                                                       // Return -1.
2119#endif
2120.if \is_resolved
2121    mov    x2, x0 // class is actually stored in x0 already
2122.else
2123    ldr    x2, [x1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
2124                                                              // Load the class (x2)
2125    ldr    w2, [x2, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
2126.endif
2127    // Most common case: GC is not marking.
2128    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
2129    cbnz   x3, .Lmarking\name
2130.Ldo_allocation\name:
2131    \fast_path .Lslow_path\name
2132.Lmarking\name:
2133    // GC is marking, check the lock word of the class for the mark bit.
2134    // If the class is null, go slow path. The check is required to read the lock word.
2135    cbz    w2, .Lslow_path\name
2136    // Class is not null, check mark bit in lock word.
2137    ldr    w3, [x2, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2138    // If the bit is not zero, do the allocation.
2139    tbnz    w3, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
2140                                                              // The read barrier slow path. Mark
2141                                                              // the class.
2142    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, lr).
2143    str    xLR, [sp, #16]                                     // Align sp by 16 bytes.
2144    mov    x0, x2                                             // Pass the class as the first param.
2145    bl     artReadBarrierMark
2146    mov    x2, x0                                             // Get the (marked) class back.
2147    ldp    x0, x1, [sp, #0]                                   // Restore registers.
2148    ldr    xLR, [sp, #16]
2149    add    sp, sp, #32
2150    b      .Ldo_allocation\name
2151.Lslow_path\name:
2152    SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
2153    mov    x2, xSELF                           // Pass Thread::Current.
2154    bl     \entrypoint                         // (uint32_t type_idx, Method* method, Thread*)
2155    RESTORE_SAVE_REFS_ONLY_FRAME
2156    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
2157END \name
2158.endm
2159
2160GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_region_tlab, artAllocObjectFromCodeRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH, 0
2161GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED, 1
2162GENERATE_ALLOC_OBJECT_REGION_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, ALLOC_OBJECT_TLAB_FAST_PATH_INITIALIZED, 1
2163
2164// The common code for art_quick_alloc_array_*region_tlab
2165.macro GENERATE_ALLOC_ARRAY_REGION_TLAB name, entrypoint, fast_path, is_resolved
2166ENTRY \name
2167    // Fast path array allocation for region tlab allocation.
2168    // x0: uint32_t type_idx
2169    // x1: int32_t component_count
2170    // x2: ArtMethod* method
2171    // x3-x7: free.
2172#if !defined(USE_READ_BARRIER)
2173    mvn    x0, xzr                                            // Read barrier must be enabled here.
2174    ret                                                       // Return -1.
2175#endif
2176.if \is_resolved
2177    mov    x3, x0
2178    // If already resolved, class is stored in x0
2179.else
2180    ldr    x3, [x2, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_64]    // Load dex cache resolved types array
2181                                                              // Load the class (x2)
2182    ldr    w3, [x3, x0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
2183.endif
2184    // Most common case: GC is not marking.
2185    ldr    w4, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
2186    cbnz   x4, .Lmarking\name
2187.Ldo_allocation\name:
2188    \fast_path .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
2189.Lmarking\name:
2190    // GC is marking, check the lock word of the class for the mark bit.
2191    // If the class is null, go slow path. The check is required to read the lock word.
2192    cbz    w3, .Lslow_path\name
2193    // Class is not null, check mark bit in lock word.
2194    ldr    w4, [x3, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2195    // If the bit is not zero, do the allocation.
2196    tbnz   w4, #LOCK_WORD_MARK_BIT_SHIFT, .Ldo_allocation\name
2197                                                              // The read barrier slow path. Mark
2198                                                              // the class.
2199    stp    x0, x1, [sp, #-32]!                                // Save registers (x0, x1, x2, lr).
2200    stp    x2, xLR, [sp, #16]
2201    mov    x0, x3                                             // Pass the class as the first param.
2202    bl     artReadBarrierMark
2203    mov    x3, x0                                             // Get the (marked) class back.
2204    ldp    x2, xLR, [sp, #16]
2205    ldp    x0, x1, [sp], #32                                  // Restore registers.
2206    b      .Ldo_allocation\name
2207.Lslow_path\name:
2208    // x0: uint32_t type_idx / mirror::Class* klass (if resolved)
2209    // x1: int32_t component_count
2210    // x2: ArtMethod* method
2211    // x3: Thread* self
2212    SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
2213    mov    x3, xSELF                  // pass Thread::Current
2214    bl     \entrypoint
2215    RESTORE_SAVE_REFS_ONLY_FRAME
2216    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
2217END \name
2218.endm
2219
2220GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_region_tlab, artAllocArrayFromCodeRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH, 0
2221// TODO: art_quick_alloc_array_resolved_region_tlab seems to not get called. Investigate compiler.
2222GENERATE_ALLOC_ARRAY_REGION_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED, 1
2223
2224    /*
2225     * Called by managed code when the thread has been asked to suspend.
2226     */
2227    .extern artTestSuspendFromCode
2228ENTRY art_quick_test_suspend
2229    SETUP_SAVE_EVERYTHING_FRAME               // save callee saves for stack crawl
2230    mov    x0, xSELF
2231    bl     artTestSuspendFromCode             // (Thread*)
2232    RESTORE_SAVE_EVERYTHING_FRAME
2233    ret
2234END art_quick_test_suspend
2235
2236ENTRY art_quick_implicit_suspend
2237    mov    x0, xSELF
2238    SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
2239    bl     artTestSuspendFromCode             // (Thread*)
2240    RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
2241END art_quick_implicit_suspend
2242
2243     /*
2244     * Called by managed code that is attempting to call a method on a proxy class. On entry
2245     * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
2246     * method agrees with a ref and args callee save frame.
2247     */
2248     .extern artQuickProxyInvokeHandler
2249ENTRY art_quick_proxy_invoke_handler
2250    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2251    mov     x2, xSELF                   // pass Thread::Current
2252    mov     x3, sp                      // pass SP
2253    bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
2254    ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2255    cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
2256    RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
2257    fmov    d0, x0                      // Store result in d0 in case it was float or double
2258    ret                                 // return on success
2259.Lexception_in_proxy:
2260    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2261    DELIVER_PENDING_EXCEPTION
2262END art_quick_proxy_invoke_handler
2263
2264    /*
2265     * Called to resolve an imt conflict.
2266     * x0 is the conflict ArtMethod.
2267     * xIP1 is a hidden argument that holds the target interface method's dex method index.
2268     *
2269     * Note that this stub writes to xIP0, xIP1, and x0.
2270     */
2271    .extern artInvokeInterfaceTrampoline
2272ENTRY art_quick_imt_conflict_trampoline
2273    ldr xIP0, [sp, #0]  // Load referrer
2274    ldr xIP0, [xIP0, #ART_METHOD_DEX_CACHE_METHODS_OFFSET_64]   // Load dex cache methods array
2275    ldr xIP0, [xIP0, xIP1, lsl #POINTER_SIZE_SHIFT]  // Load interface method
2276    ldr xIP1, [x0, #ART_METHOD_JNI_OFFSET_64]  // Load ImtConflictTable
2277    ldr x0, [xIP1]  // Load first entry in ImtConflictTable.
2278.Limt_table_iterate:
2279    cmp x0, xIP0
2280    // Branch if found. Benchmarks have shown doing a branch here is better.
2281    beq .Limt_table_found
2282    // If the entry is null, the interface method is not in the ImtConflictTable.
2283    cbz x0, .Lconflict_trampoline
2284    // Iterate over the entries of the ImtConflictTable.
2285    ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
2286    b .Limt_table_iterate
2287.Limt_table_found:
2288    // We successfully hit an entry in the table. Load the target method
2289    // and jump to it.
2290    ldr x0, [xIP1, #__SIZEOF_POINTER__]
2291    ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
2292    br xIP0
2293.Lconflict_trampoline:
2294    // Call the runtime stub to populate the ImtConflictTable and jump to the
2295    // resolved method.
2296    INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
2297END art_quick_imt_conflict_trampoline
2298
2299ENTRY art_quick_resolution_trampoline
2300    SETUP_SAVE_REFS_AND_ARGS_FRAME
2301    mov x2, xSELF
2302    mov x3, sp
2303    bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
2304    cbz x0, 1f
2305    mov xIP0, x0            // Remember returned code pointer in xIP0.
2306    ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
2307    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2308    br xIP0
23091:
2310    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2311    DELIVER_PENDING_EXCEPTION
2312END art_quick_resolution_trampoline
2313
2314/*
2315 * Generic JNI frame layout:
2316 *
2317 * #-------------------#
2318 * |                   |
2319 * | caller method...  |
2320 * #-------------------#    <--- SP on entry
2321 * | Return X30/LR     |
2322 * | X29/FP            |    callee save
2323 * | X28               |    callee save
2324 * | X27               |    callee save
2325 * | X26               |    callee save
2326 * | X25               |    callee save
2327 * | X24               |    callee save
2328 * | X23               |    callee save
2329 * | X22               |    callee save
2330 * | X21               |    callee save
2331 * | X20               |    callee save
2332 * | X19               |    callee save
2333 * | X7                |    arg7
2334 * | X6                |    arg6
2335 * | X5                |    arg5
2336 * | X4                |    arg4
2337 * | X3                |    arg3
2338 * | X2                |    arg2
2339 * | X1                |    arg1
2340 * | D7                |    float arg 8
2341 * | D6                |    float arg 7
2342 * | D5                |    float arg 6
2343 * | D4                |    float arg 5
2344 * | D3                |    float arg 4
2345 * | D2                |    float arg 3
2346 * | D1                |    float arg 2
2347 * | D0                |    float arg 1
2348 * | Method*           | <- X0
2349 * #-------------------#
2350 * | local ref cookie  | // 4B
2351 * | handle scope size | // 4B
2352 * #-------------------#
2353 * | JNI Call Stack    |
2354 * #-------------------#    <--- SP on native call
2355 * |                   |
2356 * | Stack for Regs    |    The trampoline assembly will pop these values
2357 * |                   |    into registers for native call
2358 * #-------------------#
2359 * | Native code ptr   |
2360 * #-------------------#
2361 * | Free scratch      |
2362 * #-------------------#
2363 * | Ptr to (1)        |    <--- SP
2364 * #-------------------#
2365 */
2366    /*
2367     * Called to do a generic JNI down-call
2368     */
2369ENTRY art_quick_generic_jni_trampoline
2370    SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
2371
2372    // Save SP , so we can have static CFI info.
2373    mov x28, sp
2374    .cfi_def_cfa_register x28
2375
2376    // This looks the same, but is different: this will be updated to point to the bottom
2377    // of the frame when the handle scope is inserted.
2378    mov xFP, sp
2379
2380    mov xIP0, #5120
2381    sub sp, sp, xIP0
2382
2383    // prepare for artQuickGenericJniTrampoline call
2384    // (Thread*,  SP)
2385    //    x0      x1   <= C calling convention
2386    //   xSELF    xFP  <= where they are
2387
2388    mov x0, xSELF   // Thread*
2389    mov x1, xFP
2390    bl artQuickGenericJniTrampoline  // (Thread*, sp)
2391
2392    // The C call will have registered the complete save-frame on success.
2393    // The result of the call is:
2394    // x0: pointer to native code, 0 on error.
2395    // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
2396
2397    // Check for error = 0.
2398    cbz x0, .Lexception_in_native
2399
2400    // Release part of the alloca.
2401    mov sp, x1
2402
2403    // Save the code pointer
2404    mov xIP0, x0
2405
2406    // Load parameters from frame into registers.
2407    // TODO Check with artQuickGenericJniTrampoline.
2408    //      Also, check again APPCS64 - the stack arguments are interleaved.
2409    ldp x0, x1, [sp]
2410    ldp x2, x3, [sp, #16]
2411    ldp x4, x5, [sp, #32]
2412    ldp x6, x7, [sp, #48]
2413
2414    ldp d0, d1, [sp, #64]
2415    ldp d2, d3, [sp, #80]
2416    ldp d4, d5, [sp, #96]
2417    ldp d6, d7, [sp, #112]
2418
2419    add sp, sp, #128
2420
2421    blr xIP0        // native call.
2422
2423    // result sign extension is handled in C code
2424    // prepare for artQuickGenericJniEndTrampoline call
2425    // (Thread*, result, result_f)
2426    //    x0       x1       x2        <= C calling convention
2427    mov x1, x0      // Result (from saved).
2428    mov x0, xSELF   // Thread register.
2429    fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
2430
2431    bl artQuickGenericJniEndTrampoline
2432
2433    // Pending exceptions possible.
2434    ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
2435    cbnz x2, .Lexception_in_native
2436
2437    // Tear down the alloca.
2438    mov sp, x28
2439    .cfi_def_cfa_register sp
2440
2441    // Tear down the callee-save frame.
2442    RESTORE_SAVE_REFS_AND_ARGS_FRAME
2443
2444    // store into fpr, for when it's a fpr return...
2445    fmov d0, x0
2446    ret
2447
2448.Lexception_in_native:
2449    // Move to x1 then sp to please assembler.
2450    ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
2451    mov sp, x1
2452    .cfi_def_cfa_register sp
2453    # This will create a new save-all frame, required by the runtime.
2454    DELIVER_PENDING_EXCEPTION
2455END art_quick_generic_jni_trampoline
2456
2457/*
2458 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
2459 * of a quick call:
2460 * x0 = method being called/to bridge to.
2461 * x1..x7, d0..d7 = arguments to that method.
2462 */
2463ENTRY art_quick_to_interpreter_bridge
2464    SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
2465
2466    //  x0 will contain mirror::ArtMethod* method.
2467    mov x1, xSELF                          // How to get Thread::Current() ???
2468    mov x2, sp
2469
2470    // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
2471    //                                      mirror::ArtMethod** sp)
2472    bl   artQuickToInterpreterBridge
2473
2474    RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
2475
2476    fmov d0, x0
2477
2478    RETURN_OR_DELIVER_PENDING_EXCEPTION
2479END art_quick_to_interpreter_bridge
2480
2481
2482//
2483// Instrumentation-related stubs
2484//
2485    .extern artInstrumentationMethodEntryFromCode
2486ENTRY art_quick_instrumentation_entry
2487    SETUP_SAVE_REFS_AND_ARGS_FRAME
2488
2489    mov   x20, x0             // Preserve method reference in a callee-save.
2490
2491    mov   x2, xSELF
2492    mov   x3, xLR
2493    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, LR)
2494
2495    mov   xIP0, x0            // x0 = result of call.
2496    mov   x0, x20             // Reload method reference.
2497
2498    RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
2499    adr   xLR, art_quick_instrumentation_exit
2500    br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
2501END art_quick_instrumentation_entry
2502
2503    .extern artInstrumentationMethodExitFromCode
2504ENTRY art_quick_instrumentation_exit
2505    mov   xLR, #0             // Clobber LR for later checks.
2506
2507    SETUP_SAVE_REFS_ONLY_FRAME
2508
2509    // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
2510    // we would need to fully restore it. As there are a lot of callee-save registers, it seems
2511    // easier to have an extra small stack area.
2512
2513    str x0, [sp, #-16]!       // Save integer result.
2514    .cfi_adjust_cfa_offset 16
2515    str d0,  [sp, #8]         // Save floating-point result.
2516
2517    add   x1, sp, #16         // Pass SP.
2518    mov   x2, x0              // Pass integer result.
2519    fmov  x3, d0              // Pass floating-point result.
2520    mov   x0, xSELF           // Pass Thread.
2521    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
2522
2523    mov   xIP0, x0            // Return address from instrumentation call.
2524    mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
2525
2526    ldr   d0, [sp, #8]        // Restore floating-point result.
2527    ldr   x0, [sp], 16        // Restore integer result, and drop stack area.
2528    .cfi_adjust_cfa_offset 16
2529
2530    POP_SAVE_REFS_ONLY_FRAME
2531
2532    br    xIP0                // Tail-call out.
2533END art_quick_instrumentation_exit
2534
2535    /*
2536     * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
2537     * will long jump to the upcall with a special exception of -1.
2538     */
2539    .extern artDeoptimize
2540ENTRY art_quick_deoptimize
2541    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
2542    mov    x0, xSELF          // Pass thread.
2543    bl     artDeoptimize      // artDeoptimize(Thread*)
2544    brk 0
2545END art_quick_deoptimize
2546
2547    /*
2548     * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
2549     * will long jump to the upcall with a special exception of -1.
2550     */
2551    .extern artDeoptimizeFromCompiledCode
2552ENTRY art_quick_deoptimize_from_compiled_code
2553    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
2554    mov    x0, xSELF                      // Pass thread.
2555    bl     artDeoptimizeFromCompiledCode  // artDeoptimizeFromCompiledCode(Thread*)
2556    brk 0
2557END art_quick_deoptimize_from_compiled_code
2558
2559
2560    /*
2561     * String's indexOf.
2562     *
2563     * TODO: Not very optimized.
2564     * On entry:
2565     *    x0:   string object (known non-null)
2566     *    w1:   char to match (known <= 0xFFFF)
2567     *    w2:   Starting offset in string data
2568     */
2569ENTRY art_quick_indexof
2570    ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
2571    add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
2572
2573    /* Clamp start to [0..count] */
2574    cmp   w2, #0
2575    csel  w2, wzr, w2, lt
2576    cmp   w2, w3
2577    csel  w2, w3, w2, gt
2578
2579    /* Save a copy to compute result */
2580    mov   x5, x0
2581
2582    /* Build pointer to start of data to compare and pre-bias */
2583    add   x0, x0, x2, lsl #1
2584    sub   x0, x0, #2
2585
2586    /* Compute iteration count */
2587    sub   w2, w3, w2
2588
2589    /*
2590     * At this point we have:
2591     *  x0: start of the data to test
2592     *  w1: char to compare
2593     *  w2: iteration count
2594     *  x5: original start of string data
2595     */
2596
2597    subs  w2, w2, #4
2598    b.lt  .Lindexof_remainder
2599
2600.Lindexof_loop4:
2601    ldrh  w6, [x0, #2]!
2602    ldrh  w7, [x0, #2]!
2603    ldrh  wIP0, [x0, #2]!
2604    ldrh  wIP1, [x0, #2]!
2605    cmp   w6, w1
2606    b.eq  .Lmatch_0
2607    cmp   w7, w1
2608    b.eq  .Lmatch_1
2609    cmp   wIP0, w1
2610    b.eq  .Lmatch_2
2611    cmp   wIP1, w1
2612    b.eq  .Lmatch_3
2613    subs  w2, w2, #4
2614    b.ge  .Lindexof_loop4
2615
2616.Lindexof_remainder:
2617    adds  w2, w2, #4
2618    b.eq  .Lindexof_nomatch
2619
2620.Lindexof_loop1:
2621    ldrh  w6, [x0, #2]!
2622    cmp   w6, w1
2623    b.eq  .Lmatch_3
2624    subs  w2, w2, #1
2625    b.ne  .Lindexof_loop1
2626
2627.Lindexof_nomatch:
2628    mov   x0, #-1
2629    ret
2630
2631.Lmatch_0:
2632    sub   x0, x0, #6
2633    sub   x0, x0, x5
2634    asr   x0, x0, #1
2635    ret
2636.Lmatch_1:
2637    sub   x0, x0, #4
2638    sub   x0, x0, x5
2639    asr   x0, x0, #1
2640    ret
2641.Lmatch_2:
2642    sub   x0, x0, #2
2643    sub   x0, x0, x5
2644    asr   x0, x0, #1
2645    ret
2646.Lmatch_3:
2647    sub   x0, x0, x5
2648    asr   x0, x0, #1
2649    ret
2650END art_quick_indexof
2651
2652    /*
2653     * Create a function `name` calling the ReadBarrier::Mark routine,
2654     * getting its argument and returning its result through W register
2655     * `wreg` (corresponding to X register `xreg`), saving and restoring
2656     * all caller-save registers.
2657     *
2658     * If `wreg` is different from `w0`, the generated function follows a
2659     * non-standard runtime calling convention:
2660     * - register `wreg` is used to pass the (sole) argument of this
2661     *   function (instead of W0);
2662     * - register `wreg` is used to return the result of this function
2663     *   (instead of W0);
2664     * - W0 is treated like a normal (non-argument) caller-save register;
2665     * - everything else is the same as in the standard runtime calling
2666     *   convention (e.g. standard callee-save registers are preserved).
2667     */
2668.macro READ_BARRIER_MARK_REG name, wreg, xreg
2669ENTRY \name
2670    // Reference is null, no work to do at all.
2671    cbz \wreg, .Lret_rb_\name
2672    /*
2673     * Allocate 46 stack slots * 8 = 368 bytes:
2674     * - 20 slots for core registers X0-X19
2675     * - 24 slots for floating-point registers D0-D7 and D16-D31
2676     * -  1 slot for return address register XLR
2677     * -  1 padding slot for 16-byte stack alignment
2678     */
2679    // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
2680    ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
2681    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
2682    ret
2683.Lslow_path_rb_\name:
2684    // Save all potentially live caller-save core registers.
2685    stp   x0, x1,   [sp, #-368]!
2686    .cfi_adjust_cfa_offset 368
2687    .cfi_rel_offset x0, 0
2688    .cfi_rel_offset x1, 8
2689    stp   x2, x3,   [sp, #16]
2690    .cfi_rel_offset x2, 16
2691    .cfi_rel_offset x3, 24
2692    stp   x4, x5,   [sp, #32]
2693    .cfi_rel_offset x4, 32
2694    .cfi_rel_offset x5, 40
2695    stp   x6, x7,   [sp, #48]
2696    .cfi_rel_offset x6, 48
2697    .cfi_rel_offset x7, 56
2698    stp   x8, x9,   [sp, #64]
2699    .cfi_rel_offset x8, 64
2700    .cfi_rel_offset x9, 72
2701    stp   x10, x11, [sp, #80]
2702    .cfi_rel_offset x10, 80
2703    .cfi_rel_offset x11, 88
2704    stp   x12, x13, [sp, #96]
2705    .cfi_rel_offset x12, 96
2706    .cfi_rel_offset x13, 104
2707    stp   x14, x15, [sp, #112]
2708    .cfi_rel_offset x14, 112
2709    .cfi_rel_offset x15, 120
2710    stp   x16, x17, [sp, #128]
2711    .cfi_rel_offset x16, 128
2712    .cfi_rel_offset x17, 136
2713    stp   x18, x19, [sp, #144]
2714    .cfi_rel_offset x18, 144
2715    .cfi_rel_offset x19, 152
2716    // Save all potentially live caller-save floating-point registers.
2717    stp   d0, d1,   [sp, #160]
2718    stp   d2, d3,   [sp, #176]
2719    stp   d4, d5,   [sp, #192]
2720    stp   d6, d7,   [sp, #208]
2721    stp   d16, d17, [sp, #224]
2722    stp   d18, d19, [sp, #240]
2723    stp   d20, d21, [sp, #256]
2724    stp   d22, d23, [sp, #272]
2725    stp   d24, d25, [sp, #288]
2726    stp   d26, d27, [sp, #304]
2727    stp   d28, d29, [sp, #320]
2728    stp   d30, d31, [sp, #336]
2729    // Save return address.
2730    str   xLR,      [sp, #352]
2731    .cfi_rel_offset x30, 352
2732    // (sp + #360 is a padding slot)
2733
2734    .ifnc \wreg, w0
2735      mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
2736    .endif
2737    bl    artReadBarrierMark            // artReadBarrierMark(obj)
2738    .ifnc \wreg, w0
2739      mov   \wreg, w0                   // Return result into `wreg`
2740    .endif
2741
2742    // Restore core regs, except `xreg`, as `wreg` is used to return the
2743    // result of this function (simply remove it from the stack instead).
2744    POP_REGS_NE x0, x1,   0,   \xreg
2745    POP_REGS_NE x2, x3,   16,  \xreg
2746    POP_REGS_NE x4, x5,   32,  \xreg
2747    POP_REGS_NE x6, x7,   48,  \xreg
2748    POP_REGS_NE x8, x9,   64,  \xreg
2749    POP_REGS_NE x10, x11, 80,  \xreg
2750    POP_REGS_NE x12, x13, 96,  \xreg
2751    POP_REGS_NE x14, x15, 112, \xreg
2752    POP_REGS_NE x16, x17, 128, \xreg
2753    POP_REGS_NE x18, x19, 144, \xreg
2754    // Restore floating-point registers.
2755    ldp   d0, d1,   [sp, #160]
2756    ldp   d2, d3,   [sp, #176]
2757    ldp   d4, d5,   [sp, #192]
2758    ldp   d6, d7,   [sp, #208]
2759    ldp   d16, d17, [sp, #224]
2760    ldp   d18, d19, [sp, #240]
2761    ldp   d20, d21, [sp, #256]
2762    ldp   d22, d23, [sp, #272]
2763    ldp   d24, d25, [sp, #288]
2764    ldp   d26, d27, [sp, #304]
2765    ldp   d28, d29, [sp, #320]
2766    ldp   d30, d31, [sp, #336]
2767    // Restore return address and remove padding.
2768    ldr   xLR,      [sp, #352]
2769    .cfi_restore x30
2770    add sp, sp, #368
2771    .cfi_adjust_cfa_offset -368
2772.Lret_rb_\name:
2773    ret
2774END \name
2775.endm
2776
2777READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
2778READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
2779READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
2780READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
2781READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
2782READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
2783READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
2784READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
2785READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
2786READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
2787READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
2788READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
2789READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
2790READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
2791READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
2792READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
2793// READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
2794READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
2795READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
2796READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
2797READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
2798READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
2799READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
2800READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
2801READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
2802READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
2803READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
2804READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
2805READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
2806READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
2807