CompilerTemplateAsm-armv7-a.S revision 13fbc2e4bfa04cce8e181ac37d7f2b13a54aa037
1/*
2 * This file was generated automatically by gen-template.py for 'armv7-a'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: armv5te/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * ARMv5 definitions and declarations.
28 */
29
30/*
31ARM EABI general notes:
32
33r0-r3 hold first 4 args to a method; they are not preserved across method calls
34r4-r8 are available for general use
35r9 is given special treatment in some situations, but not for us
36r10 (sl) seems to be generally available
37r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
38r12 (ip) is scratch -- not preserved across method calls
39r13 (sp) should be managed carefully in case a signal arrives
40r14 (lr) must be preserved
41r15 (pc) can be tinkered with directly
42
43r0 holds returns of <= 4 bytes
44r0-r1 hold returns of 8 bytes, low word in r0
45
46Callee must save/restore r4+ (except r12) if it modifies them.
47
48Stack is "full descending".  Only the arguments that don't fit in the first 4
49registers are placed on the stack.  "sp" points at the first stacked argument
50(i.e. the 5th arg).
51
52VFP: single-precision results in s0, double-precision results in d0.
53
54In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
5564-bit quantities (long long, double) must be 64-bit aligned.
56*/
57
58/*
59JIT and ARM notes:
60
61The following registers have fixed assignments:
62
63  reg nick      purpose
64  r5  rFP       interpreted frame pointer, used for accessing locals and args
65  r6  rGLUE     MterpGlue pointer
66
67The following registers have fixed assignments in mterp but are scratch
68registers in compiled code
69
70  reg nick      purpose
71  r4  rPC       interpreted program counter, used for fetching instructions
72  r7  rINST     first 16-bit code unit of current instruction
73  r8  rIBASE    interpreted instruction base pointer, used for computed goto
74
75Macros are provided for common operations.  Each macro MUST emit only
76one instruction to make instruction-counting easier.  They MUST NOT alter
77unspecified registers or condition codes.
78*/
79
80/* single-purpose registers, given names for clarity */
81#define rPC     r4
82#define rFP     r5
83#define rGLUE   r6
84#define rINST   r7
85#define rIBASE  r8
86
87/*
88 * Given a frame pointer, find the stack save area.
89 *
90 * In C this is "((StackSaveArea*)(_fp) -1)".
91 */
92#define SAVEAREA_FROM_FP(_reg, _fpreg) \
93    sub     _reg, _fpreg, #sizeofStackSaveArea
94
95#define EXPORT_PC() \
96    str     rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
97
98/*
99 * This is a #include, not a %include, because we want the C pre-processor
100 * to expand the macros into assembler assignment statements.
101 */
102#include "../../../mterp/common/asm-constants.h"
103
104/* File: armv5te-vfp/platform.S */
105/*
106 * ===========================================================================
107 *  CPU-version-specific defines and utility
108 * ===========================================================================
109 */
110
111
112    .global dvmCompilerTemplateStart
113    .type   dvmCompilerTemplateStart, %function
114    .text
115
116dvmCompilerTemplateStart:
117
118/* ------------------------------ */
119    .balign 4
120    .global dvmCompiler_TEMPLATE_CMP_LONG
121dvmCompiler_TEMPLATE_CMP_LONG:
122/* File: armv5te/TEMPLATE_CMP_LONG.S */
123    /*
124     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
125     * register based on the results of the comparison.
126     *
127     * We load the full values with LDM, but in practice many values could
128     * be resolved by only looking at the high word.  This could be made
129     * faster or slower by splitting the LDM into a pair of LDRs.
130     *
131     * If we just wanted to set condition flags, we could do this:
132     *  subs    ip, r0, r2
133     *  sbcs    ip, r1, r3
134     *  subeqs  ip, r0, r2
135     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
136     * integer value, which we can do with 2 conditional mov/mvn instructions
137     * (set 1, set -1; if they're equal we already have 0 in ip), giving
138     * us a constant 5-cycle path plus a branch at the end to the
139     * instruction epilogue code.  The multi-compare approach below needs
140     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
141     * in the worst case (the 64-bit values are equal).
142     */
143    /* cmp-long vAA, vBB, vCC */
144    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
145    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
146    bgt     .LTEMPLATE_CMP_LONG_greater
147    subs    r0, r0, r2                  @ r0<- r0 - r2
148    bxeq     lr
149    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
150.LTEMPLATE_CMP_LONG_less:
151    mvn     r0, #0                      @ r0<- -1
152    bx      lr
153.LTEMPLATE_CMP_LONG_greater:
154    mov     r0, #1                      @ r0<- 1
155    bx      lr
156
157/* ------------------------------ */
158    .balign 4
159    .global dvmCompiler_TEMPLATE_RETURN
160dvmCompiler_TEMPLATE_RETURN:
161/* File: armv5te/TEMPLATE_RETURN.S */
162    /*
163     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
164     * If the stored value in returnAddr
165     * is non-zero, the caller is compiled by the JIT thus return to the
166     * address in the code cache following the invoke instruction. Otherwise
167     * return to the special dvmJitToInterpNoChain entry point.
168     */
169#if defined(WITH_INLINE_PROFILING)
170    stmfd   sp!, {r0-r2,lr}             @ preserve live registers
171    mov     r0, r6
172    @ r0=rGlue
173    mov     lr, pc
174    ldr     pc, .LdvmFastJavaMethodTraceExit
175    ldmfd   sp!, {r0-r2,lr}             @ restore live registers
176#endif
177    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
178    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
179    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
180    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
181#if !defined(WITH_SELF_VERIFICATION)
182    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
183#else
184    mov     r9, #0                      @ disable chaining
185#endif
186    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
187                                        @ r2<- method we're returning to
188    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
189    cmp     r2, #0                      @ break frame?
190#if !defined(WITH_SELF_VERIFICATION)
191    beq     1f                          @ bail to interpreter
192#else
193    blxeq   lr                          @ punt to interpreter and compare state
194#endif
195    ldr     r1, .LdvmJitToInterpNoChainNoProfile @ defined in footer.S
196    mov     rFP, r10                    @ publish new FP
197    ldr     r10, [r2, #offMethod_clazz] @ r10<- method->clazz
198    ldr     r8, [r8]                    @ r8<- suspendCount
199
200    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
201    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
202    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
203    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
204    str     r0, [rGLUE, #offGlue_methodClassDex]
205    cmp     r8, #0                      @ check the suspendCount
206    movne   r9, #0                      @ clear the chaining cell address
207    str     r9, [r3, #offThread_inJitCodeCache] @ in code cache or not
208    cmp     r9, #0                      @ chaining cell exists?
209    blxne   r9                          @ jump to the chaining cell
210#if defined(WITH_JIT_TUNING)
211    mov     r0, #kCallsiteInterpreted
212#endif
213    mov     pc, r1                      @ callsite is interpreted
2141:
215    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
216    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
217    mov     r1, #0                      @ changeInterp = false
218    mov     r0, rGLUE                   @ Expecting rGLUE in r0
219    blx     r2                          @ exit the interpreter
220
221/* ------------------------------ */
222    .balign 4
223    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
224dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
225/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
226    /*
227     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
228     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
229     * runtime-resolved callee.
230     */
231    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
232    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
233    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
234    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
235    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
236    add     r3, r1, #1  @ Thumb addr is odd
237    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
238    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
239    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
240    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
241    ldr     r8, [r8]                    @ r8<- suspendCount (int)
242    cmp     r10, r9                     @ bottom < interpStackEnd?
243    bxlo    lr                          @ return to raise stack overflow excep.
244    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
245    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
246    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
247    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
248    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
249    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
250
251
252    @ set up newSaveArea
253    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
254    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
255    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
256    cmp     r8, #0                      @ suspendCount != 0
257    bxne    lr                          @ bail to the interpreter
258    tst     r10, #ACC_NATIVE
259#if !defined(WITH_SELF_VERIFICATION)
260    bne     .LinvokeNative
261#else
262    bxne    lr                          @ bail to the interpreter
263#endif
264
265    ldr     r10, .LdvmJitToInterpTraceSelectNoChain
266    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
267    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
268
269    @ Update "glue" values for the new method
270    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
271    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
272    mov     rFP, r1                         @ fp = newFp
273    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
274#if defined(WITH_INLINE_PROFILING)
275    stmfd   sp!, {r0-r3}                    @ preserve r0-r3
276    mov     r1, r6
277    @ r0=methodToCall, r1=rGlue
278    mov     lr, pc
279    ldr     pc, .LdvmFastMethodTraceEnter
280    ldmfd   sp!, {r0-r3}                    @ restore r0-r3
281#endif
282
283    @ Start executing the callee
284#if defined(WITH_JIT_TUNING)
285    mov     r0, #kInlineCacheMiss
286#endif
287    mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
288
289/* ------------------------------ */
290    .balign 4
291    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
292dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
293/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
294    /*
295     * For monomorphic callsite, setup the Dalvik frame and return to the
296     * Thumb code through the link register to transfer control to the callee
297     * method through a dedicated chaining cell.
298     */
299    @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
300    @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
301    @ methodToCall is guaranteed to be non-native
302.LinvokeChain:
303    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
304    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
305    add     r3, r1, #1  @ Thumb addr is odd
306    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
307    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
308    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
309    add     r12, lr, #2                 @ setup the punt-to-interp address
310    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
311    ldr     r8, [r8]                    @ r8<- suspendCount (int)
312    cmp     r10, r9                     @ bottom < interpStackEnd?
313    bxlo    r12                         @ return to raise stack overflow excep.
314    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
315    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
316    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
317    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
318
319    @ set up newSaveArea
320    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
321    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
322    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
323    cmp     r8, #0                      @ suspendCount != 0
324    bxne    r12                         @ bail to the interpreter
325
326    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
327    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
328
329    @ Update "glue" values for the new method
330    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
331    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
332    mov     rFP, r1                         @ fp = newFp
333    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
334#if defined(WITH_INLINE_PROFILING)
335    stmfd   sp!, {r0-r2,lr}             @ preserve clobbered live registers
336    mov     r1, r6
337    @ r0=methodToCall, r1=rGlue
338    mov     lr, pc
339    ldr     pc, .LdvmFastMethodTraceEnter
340    ldmfd   sp!, {r0-r2,lr}             @ restore registers
341#endif
342
343    bx      lr                              @ return to the callee-chaining cell
344
345/* ------------------------------ */
346    .balign 4
347    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
348dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
349/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
350    /*
351     * For polymorphic callsite, check whether the cached class pointer matches
352     * the current one. If so setup the Dalvik frame and return to the
353     * Thumb code through the link register to transfer control to the callee
354     * method through a dedicated chaining cell.
355     *
356     * The predicted chaining cell is declared in ArmLIR.h with the
357     * following layout:
358     *
359     *  typedef struct PredictedChainingCell {
360     *      u4 branch;
361     *      const ClassObject *clazz;
362     *      const Method *method;
363     *      u4 counter;
364     *  } PredictedChainingCell;
365     *
366     * Upon returning to the callsite:
367     *    - lr  : to branch to the chaining cell
368     *    - lr+2: to punt to the interpreter
369     *    - lr+4: to fully resolve the callee and may rechain.
370     *            r3 <- class
371     *            r9 <- counter
372     */
373    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
374    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
375    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
376    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
377    ldr     r9, [rGLUE, #offGlue_icRechainCount]   @ r1 <- shared rechainCount
378    cmp     r3, r8          @ predicted class == actual class?
379#if defined(WITH_JIT_TUNING)
380    ldr     r7, .LdvmICHitCount
381#if defined(WORKAROUND_CORTEX_A9_745320)
382    /* Don't use conditional loads if the HW defect exists */
383    bne     101f
384    ldr     r10, [r7, #0]
385101:
386#else
387    ldreq   r10, [r7, #0]
388#endif
389    add     r10, r10, #1
390    streq   r10, [r7, #0]
391#endif
392    ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
393    ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
394    beq     .LinvokeChain   @ predicted chain is valid
395    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
396    cmp     r8, #0          @ initialized class or not
397    moveq   r1, #0
398    subne   r1, r9, #1      @ count--
399    strne   r1, [rGLUE, #offGlue_icRechainCount]   @ write back to InterpState
400    add     lr, lr, #4      @ return to fully-resolve landing pad
401    /*
402     * r1 <- count
403     * r2 <- &predictedChainCell
404     * r3 <- this->class
405     * r4 <- dPC
406     * r7 <- this->class->vtable
407     */
408    bx      lr
409
410/* ------------------------------ */
411    .balign 4
412    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
413dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
414/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
415    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
416    @ r7 = methodToCall->registersSize
417    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
418    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
419    add     r3, r1, #1  @ Thumb addr is odd
420    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
421    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
422    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
423    ldr     r8, [r8]                    @ r3<- suspendCount (int)
424    cmp     r10, r9                     @ bottom < interpStackEnd?
425    bxlo    lr                          @ return to raise stack overflow excep.
426    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
427    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
428    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
429
430    @ set up newSaveArea
431    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
432    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
433    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
434    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
435    cmp     r8, #0                      @ suspendCount != 0
436    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
437#if !defined(WITH_SELF_VERIFICATION)
438    bxne    lr                          @ bail to the interpreter
439#else
440    bx      lr                          @ bail to interpreter unconditionally
441#endif
442
443    @ go ahead and transfer control to the native code
444    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
445    mov     r2, #0
446    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
447    str     r2, [r3, #offThread_inJitCodeCache] @ not in the jit code cache
448    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
449                                        @ newFp->localRefCookie=top
450    mov     r9, r3                      @ r9<- glue->self (preserve)
451    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
452
453    mov     r2, r0                      @ r2<- methodToCall
454    mov     r0, r1                      @ r0<- newFP
455    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
456#if defined(WITH_INLINE_PROFILING)
457    @ r2=methodToCall, r6=rGLUE
458    stmfd   sp!, {r2,r6}                @ to be consumed after JNI return
459    stmfd   sp!, {r0-r3}                @ preserve r0-r3
460    mov     r0, r2
461    mov     r1, r6
462    @ r0=JNIMethod, r1=rGlue
463    mov     lr, pc
464    ldr     pc, .LdvmFastMethodTraceEnter
465    ldmfd   sp!, {r0-r3}                @ restore r0-r3
466#endif
467
468    blx     r8                          @ off to the native code
469
470#if defined(WITH_INLINE_PROFILING)
471    ldmfd   sp!, {r0-r1}                @ restore r2 and r6
472    @ r0=JNIMethod, r1=rGlue
473    mov     lr, pc
474    ldr     pc, .LdvmFastNativeMethodTraceExit
475#endif
476    @ native return; r9=self, r10=newSaveArea
477    @ equivalent to dvmPopJniLocals
478    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
479    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
480    ldr     r1, [r9, #offThread_exception] @ check for exception
481    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
482    cmp     r1, #0                      @ null?
483    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
484    ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
485
486    @ r0 = dalvikCallsitePC
487    bne     .LhandleException           @ no, handle exception
488
489    str     r2, [r9, #offThread_inJitCodeCache] @ set the mode properly
490    cmp     r2, #0                      @ return chaining cell still exists?
491    bxne    r2                          @ yes - go ahead
492
493    @ continue executing the next instruction through the interpreter
494    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
495    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
496#if defined(WITH_JIT_TUNING)
497    mov     r0, #kCallsiteInterpreted
498#endif
499    mov     pc, r1
500
501/* ------------------------------ */
502    .balign 4
503    .global dvmCompiler_TEMPLATE_MUL_LONG
504dvmCompiler_TEMPLATE_MUL_LONG:
505/* File: armv5te/TEMPLATE_MUL_LONG.S */
506    /*
507     * Signed 64-bit integer multiply.
508     *
509     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
510     *
511     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
512     *        WX
513     *      x YZ
514     *  --------
515     *     ZW ZX
516     *  YW YX
517     *
518     * The low word of the result holds ZX, the high word holds
519     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
520     * it doesn't fit in the low 64 bits.
521     *
522     * Unlike most ARM math operations, multiply instructions have
523     * restrictions on using the same register more than once (Rd and Rm
524     * cannot be the same).
525     */
526    /* mul-long vAA, vBB, vCC */
527    mul     ip, r2, r1                  @  ip<- ZxW
528    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
529    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
530    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
531    mov     r0,r9
532    mov     r1,r10
533    bx      lr
534
535/* ------------------------------ */
536    .balign 4
537    .global dvmCompiler_TEMPLATE_SHL_LONG
538dvmCompiler_TEMPLATE_SHL_LONG:
539/* File: armv5te/TEMPLATE_SHL_LONG.S */
540    /*
541     * Long integer shift.  This is different from the generic 32/64-bit
542     * binary operations because vAA/vBB are 64-bit but vCC (the shift
543     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
544     * 6 bits.
545     */
546    /* shl-long vAA, vBB, vCC */
547    and     r2, r2, #63                 @ r2<- r2 & 0x3f
548    mov     r1, r1, asl r2              @  r1<- r1 << r2
549    rsb     r3, r2, #32                 @  r3<- 32 - r2
550    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
551    subs    ip, r2, #32                 @  ip<- r2 - 32
552    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
553    mov     r0, r0, asl r2              @  r0<- r0 << r2
554    bx      lr
555
556/* ------------------------------ */
557    .balign 4
558    .global dvmCompiler_TEMPLATE_SHR_LONG
559dvmCompiler_TEMPLATE_SHR_LONG:
560/* File: armv5te/TEMPLATE_SHR_LONG.S */
561    /*
562     * Long integer shift.  This is different from the generic 32/64-bit
563     * binary operations because vAA/vBB are 64-bit but vCC (the shift
564     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
565     * 6 bits.
566     */
567    /* shr-long vAA, vBB, vCC */
568    and     r2, r2, #63                 @ r0<- r0 & 0x3f
569    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
570    rsb     r3, r2, #32                 @  r3<- 32 - r2
571    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
572    subs    ip, r2, #32                 @  ip<- r2 - 32
573    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
574    mov     r1, r1, asr r2              @  r1<- r1 >> r2
575    bx      lr
576
577/* ------------------------------ */
578    .balign 4
579    .global dvmCompiler_TEMPLATE_USHR_LONG
580dvmCompiler_TEMPLATE_USHR_LONG:
581/* File: armv5te/TEMPLATE_USHR_LONG.S */
582    /*
583     * Long integer shift.  This is different from the generic 32/64-bit
584     * binary operations because vAA/vBB are 64-bit but vCC (the shift
585     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
586     * 6 bits.
587     */
588    /* ushr-long vAA, vBB, vCC */
589    and     r2, r2, #63                 @ r0<- r0 & 0x3f
590    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
591    rsb     r3, r2, #32                 @  r3<- 32 - r2
592    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
593    subs    ip, r2, #32                 @  ip<- r2 - 32
594    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
595    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
596    bx      lr
597
598/* ------------------------------ */
599    .balign 4
600    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
601dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
602/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
603/* File: armv5te-vfp/fbinop.S */
604    /*
605     * Generic 32-bit floating point operation.  Provide an "instr" line that
606     * specifies an instruction that performs s2 = s0 op s1.
607     *
608     * On entry:
609     *     r0 = target dalvik register address
610     *     r1 = op1 address
611     *     r2 = op2 address
612     */
613     flds    s0,[r1]
614     flds    s1,[r2]
615     fadds   s2, s0, s1
616     fsts    s2,[r0]
617     bx      lr
618
619
620/* ------------------------------ */
621    .balign 4
622    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
623dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
624/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
625/* File: armv5te-vfp/fbinop.S */
626    /*
627     * Generic 32-bit floating point operation.  Provide an "instr" line that
628     * specifies an instruction that performs s2 = s0 op s1.
629     *
630     * On entry:
631     *     r0 = target dalvik register address
632     *     r1 = op1 address
633     *     r2 = op2 address
634     */
635     flds    s0,[r1]
636     flds    s1,[r2]
637     fsubs   s2, s0, s1
638     fsts    s2,[r0]
639     bx      lr
640
641
642/* ------------------------------ */
643    .balign 4
644    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
645dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
646/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
647/* File: armv5te-vfp/fbinop.S */
648    /*
649     * Generic 32-bit floating point operation.  Provide an "instr" line that
650     * specifies an instruction that performs s2 = s0 op s1.
651     *
652     * On entry:
653     *     r0 = target dalvik register address
654     *     r1 = op1 address
655     *     r2 = op2 address
656     */
657     flds    s0,[r1]
658     flds    s1,[r2]
659     fmuls   s2, s0, s1
660     fsts    s2,[r0]
661     bx      lr
662
663
664/* ------------------------------ */
665    .balign 4
666    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
667dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
668/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
669/* File: armv5te-vfp/fbinop.S */
670    /*
671     * Generic 32-bit floating point operation.  Provide an "instr" line that
672     * specifies an instruction that performs s2 = s0 op s1.
673     *
674     * On entry:
675     *     r0 = target dalvik register address
676     *     r1 = op1 address
677     *     r2 = op2 address
678     */
679     flds    s0,[r1]
680     flds    s1,[r2]
681     fdivs   s2, s0, s1
682     fsts    s2,[r0]
683     bx      lr
684
685
686/* ------------------------------ */
687    .balign 4
688    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
689dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
690/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
691/* File: armv5te-vfp/fbinopWide.S */
692    /*
693     * Generic 64-bit floating point operation.  Provide an "instr" line that
694     * specifies an instruction that performs s2 = s0 op s1.
695     *
696     * On entry:
697     *     r0 = target dalvik register address
698     *     r1 = op1 address
699     *     r2 = op2 address
700     */
701     fldd    d0,[r1]
702     fldd    d1,[r2]
703     faddd   d2, d0, d1
704     fstd    d2,[r0]
705     bx      lr
706
707
708/* ------------------------------ */
709    .balign 4
710    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
711dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
712/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
713/* File: armv5te-vfp/fbinopWide.S */
714    /*
715     * Generic 64-bit floating point operation.  Provide an "instr" line that
716     * specifies an instruction that performs s2 = s0 op s1.
717     *
718     * On entry:
719     *     r0 = target dalvik register address
720     *     r1 = op1 address
721     *     r2 = op2 address
722     */
723     fldd    d0,[r1]
724     fldd    d1,[r2]
725     fsubd   d2, d0, d1
726     fstd    d2,[r0]
727     bx      lr
728
729
730/* ------------------------------ */
731    .balign 4
732    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
733dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
734/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
735/* File: armv5te-vfp/fbinopWide.S */
736    /*
737     * Generic 64-bit floating point operation.  Provide an "instr" line that
738     * specifies an instruction that performs s2 = s0 op s1.
739     *
740     * On entry:
741     *     r0 = target dalvik register address
742     *     r1 = op1 address
743     *     r2 = op2 address
744     */
745     fldd    d0,[r1]
746     fldd    d1,[r2]
747     fmuld   d2, d0, d1
748     fstd    d2,[r0]
749     bx      lr
750
751
752/* ------------------------------ */
753    .balign 4
754    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
755dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
756/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
757/* File: armv5te-vfp/fbinopWide.S */
758    /*
759     * Generic 64-bit floating point operation.  Provide an "instr" line that
760     * specifies an instruction that performs s2 = s0 op s1.
761     *
762     * On entry:
763     *     r0 = target dalvik register address
764     *     r1 = op1 address
765     *     r2 = op2 address
766     */
767     fldd    d0,[r1]
768     fldd    d1,[r2]
769     fdivd   d2, d0, d1
770     fstd    d2,[r0]
771     bx      lr
772
773
774/* ------------------------------ */
775    .balign 4
776    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
777dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
778/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
779/* File: armv5te-vfp/funopNarrower.S */
780    /*
781     * Generic 64bit-to-32bit floating point unary operation.  Provide an
782     * "instr" line that specifies an instruction that performs "s0 = op d0".
783     *
784     * For: double-to-int, double-to-float
785     *
786     * On entry:
787     *     r0 = target dalvik register address
788     *     r1 = src dalvik register address
789     */
790    /* unop vA, vB */
791    fldd    d0, [r1]                    @ d0<- vB
792    fcvtsd  s0, d0                              @ s0<- op d0
793    fsts    s0, [r0]                    @ vA<- s0
794    bx      lr
795
796
797/* ------------------------------ */
798    .balign 4
799    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
800dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
801/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
802/* File: armv5te-vfp/funopNarrower.S */
803    /*
804     * Generic 64bit-to-32bit floating point unary operation.  Provide an
805     * "instr" line that specifies an instruction that performs "s0 = op d0".
806     *
807     * For: double-to-int, double-to-float
808     *
809     * On entry:
810     *     r0 = target dalvik register address
811     *     r1 = src dalvik register address
812     */
813    /* unop vA, vB */
814    fldd    d0, [r1]                    @ d0<- vB
815    ftosizd  s0, d0                              @ s0<- op d0
816    fsts    s0, [r0]                    @ vA<- s0
817    bx      lr
818
819
820/* ------------------------------ */
821    .balign 4
822    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
823dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
824/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
825/* File: armv5te-vfp/funopWider.S */
826    /*
827     * Generic 32bit-to-64bit floating point unary operation.  Provide an
828     * "instr" line that specifies an instruction that performs "d0 = op s0".
829     *
830     * For: int-to-double, float-to-double
831     *
832     * On entry:
833     *     r0 = target dalvik register address
834     *     r1 = src dalvik register address
835     */
836    /* unop vA, vB */
837    flds    s0, [r1]                    @ s0<- vB
838    fcvtds  d0, s0                              @ d0<- op s0
839    fstd    d0, [r0]                    @ vA<- d0
840    bx      lr
841
842
843/* ------------------------------ */
844    .balign 4
845    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
846dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
847/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
848/* File: armv5te-vfp/funop.S */
849    /*
850     * Generic 32bit-to-32bit floating point unary operation.  Provide an
851     * "instr" line that specifies an instruction that performs "s1 = op s0".
852     *
853     * For: float-to-int, int-to-float
854     *
855     * On entry:
856     *     r0 = target dalvik register address
857     *     r1 = src dalvik register address
858     */
859    /* unop vA, vB */
860    flds    s0, [r1]                    @ s0<- vB
861    ftosizs s1, s0                              @ s1<- op s0
862    fsts    s1, [r0]                    @ vA<- s1
863    bx      lr
864
865
866/* ------------------------------ */
867    .balign 4
868    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
869dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
870/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
871/* File: armv5te-vfp/funopWider.S */
872    /*
873     * Generic 32bit-to-64bit floating point unary operation.  Provide an
874     * "instr" line that specifies an instruction that performs "d0 = op s0".
875     *
876     * For: int-to-double, float-to-double
877     *
878     * On entry:
879     *     r0 = target dalvik register address
880     *     r1 = src dalvik register address
881     */
882    /* unop vA, vB */
883    flds    s0, [r1]                    @ s0<- vB
884    fsitod  d0, s0                              @ d0<- op s0
885    fstd    d0, [r0]                    @ vA<- d0
886    bx      lr
887
888
889/* ------------------------------ */
890    .balign 4
891    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
892dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
893/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
894/* File: armv5te-vfp/funop.S */
895    /*
896     * Generic 32bit-to-32bit floating point unary operation.  Provide an
897     * "instr" line that specifies an instruction that performs "s1 = op s0".
898     *
899     * For: float-to-int, int-to-float
900     *
901     * On entry:
902     *     r0 = target dalvik register address
903     *     r1 = src dalvik register address
904     */
905    /* unop vA, vB */
906    flds    s0, [r1]                    @ s0<- vB
907    fsitos  s1, s0                              @ s1<- op s0
908    fsts    s1, [r0]                    @ vA<- s1
909    bx      lr
910
911
912/* ------------------------------ */
913    .balign 4
914    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
915dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
916/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
917    /*
918     * Compare two floating-point values.  Puts 0, 1, or -1 into the
919     * destination register based on the results of the comparison.
920     *
921     * int compare(x, y) {
922     *     if (x == y) {
923     *         return 0;
924     *     } else if (x < y) {
925     *         return -1;
926     *     } else if (x > y) {
927     *         return 1;
928     *     } else {
929     *         return 1;
930     *     }
931     * }
932     *
933     * On entry:
934     *    r0 = &op1 [vBB]
935     *    r1 = &op2 [vCC]
936     */
937    /* op vAA, vBB, vCC */
938    fldd    d0, [r0]                    @ d0<- vBB
939    fldd    d1, [r1]                    @ d1<- vCC
940    fcmpd  d0, d1                       @ compare (vBB, vCC)
941    mov     r0, #1                      @ r0<- 1 (default)
942    fmstat                              @ export status flags
943    mvnmi   r0, #0                      @ (less than) r0<- -1
944    moveq   r0, #0                      @ (equal) r0<- 0
945    bx      lr
946
947/* ------------------------------ */
948    .balign 4
949    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
950dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
951/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
952    /*
953     * Compare two floating-point values.  Puts 0, 1, or -1 into the
954     * destination register based on the results of the comparison.
955     *
956     * int compare(x, y) {
957     *     if (x == y) {
958     *         return 0;
959     *     } else if (x > y) {
960     *         return 1;
961     *     } else if (x < y) {
962     *         return -1;
963     *     } else {
964     *         return -1;
965     *     }
966     * }
967     * On entry:
968     *    r0 = &op1 [vBB]
969     *    r1 = &op2 [vCC]
970     */
971    /* op vAA, vBB, vCC */
972    fldd    d0, [r0]                    @ d0<- vBB
973    fldd    d1, [r1]                    @ d1<- vCC
974    fcmped  d0, d1                      @ compare (vBB, vCC)
975    mvn     r0, #0                      @ r0<- -1 (default)
976    fmstat                              @ export status flags
977    movgt   r0, #1                      @ (greater than) r0<- 1
978    moveq   r0, #0                      @ (equal) r0<- 0
979    bx      lr
980
981/* ------------------------------ */
982    .balign 4
983    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
984dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
985/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
986    /*
987     * Compare two floating-point values.  Puts 0, 1, or -1 into the
988     * destination register based on the results of the comparison.
989     *
990     * int compare(x, y) {
991     *     if (x == y) {
992     *         return 0;
993     *     } else if (x < y) {
994     *         return -1;
995     *     } else if (x > y) {
996     *         return 1;
997     *     } else {
998     *         return 1;
999     *     }
1000     * }
1001     * On entry:
1002     *    r0 = &op1 [vBB]
1003     *    r1 = &op2 [vCC]
1004     */
1005    /* op vAA, vBB, vCC */
1006    flds    s0, [r0]                    @ d0<- vBB
1007    flds    s1, [r1]                    @ d1<- vCC
1008    fcmps  s0, s1                      @ compare (vBB, vCC)
1009    mov     r0, #1                      @ r0<- 1 (default)
1010    fmstat                              @ export status flags
1011    mvnmi   r0, #0                      @ (less than) r0<- -1
1012    moveq   r0, #0                      @ (equal) r0<- 0
1013    bx      lr
1014
1015/* ------------------------------ */
1016    .balign 4
1017    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
1018dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
1019/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
1020    /*
1021     * Compare two floating-point values.  Puts 0, 1, or -1 into the
1022     * destination register based on the results of the comparison.
1023     *
1024     * int compare(x, y) {
1025     *     if (x == y) {
1026     *         return 0;
1027     *     } else if (x > y) {
1028     *         return 1;
1029     *     } else if (x < y) {
1030     *         return -1;
1031     *     } else {
1032     *         return -1;
1033     *     }
1034     * }
1035     * On entry:
1036     *    r0 = &op1 [vBB]
1037     *    r1 = &op2 [vCC]
1038     */
1039    /* op vAA, vBB, vCC */
1040    flds    s0, [r0]                    @ d0<- vBB
1041    flds    s1, [r1]                    @ d1<- vCC
1042    fcmps  s0, s1                      @ compare (vBB, vCC)
1043    mvn     r0, #0                      @ r0<- -1 (default)
1044    fmstat                              @ export status flags
1045    movgt   r0, #1                      @ (greater than) r0<- 1
1046    moveq   r0, #0                      @ (equal) r0<- 0
1047    bx      lr
1048
1049/* ------------------------------ */
1050    .balign 4
1051    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
1052dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
1053/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
1054    /*
1055     * 64-bit floating point vfp sqrt operation.
1056     * If the result is a NaN, bail out to library code to do
1057     * the right thing.
1058     *
1059     * On entry:
1060     *     r2 src addr of op1
1061     * On exit:
1062     *     r0,r1 = res
1063     */
1064    fldd    d0, [r2]
1065    fsqrtd  d1, d0
1066    fcmpd   d1, d1
1067    fmstat
1068    fmrrd   r0, r1, d1
1069    bxeq    lr   @ Result OK - return
1070    ldr     r2, .Lsqrt
1071    fmrrd   r0, r1, d0   @ reload orig operand
1072    bx      r2   @ tail call to sqrt library routine
1073
1074.Lsqrt:
1075    .word   sqrt
1076
1077/* ------------------------------ */
1078    .balign 4
1079    .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
1080dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
1081/* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
1082    /*
1083     * Throw an exception from JIT'ed code.
1084     * On entry:
1085     *    r0    Dalvik PC that raises the exception
1086     */
1087    b       .LhandleException
1088
1089/* ------------------------------ */
1090    .balign 4
1091    .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
1092dvmCompiler_TEMPLATE_MEM_OP_DECODE:
1093/* File: armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S */
1094#if defined(WITH_SELF_VERIFICATION)
1095    /*
1096     * This handler encapsulates heap memory ops for selfVerification mode.
1097     *
1098     * The call to the handler is inserted prior to a heap memory operation.
1099     * This handler then calls a function to decode the memory op, and process
1100     * it accordingly. Afterwards, the handler changes the return address to
1101     * skip the memory op so it never gets executed.
1102     */
1103    vpush   {d0-d15}                    @ save out all fp registers
1104    push    {r0-r12,lr}                 @ save out all registers
1105    mov     r0, lr                      @ arg0 <- link register
1106    mov     r1, sp                      @ arg1 <- stack pointer
1107    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
1108    blx     r2                          @ decode and handle the mem op
1109    pop     {r0-r12,lr}                 @ restore all registers
1110    vpop    {d0-d15}                    @ restore all fp registers
1111    bx      lr                          @ return to compiled code
1112#endif
1113
1114/* ------------------------------ */
1115    .balign 4
1116    .global dvmCompiler_TEMPLATE_STRING_COMPARETO
1117dvmCompiler_TEMPLATE_STRING_COMPARETO:
1118/* File: armv5te/TEMPLATE_STRING_COMPARETO.S */
1119    /*
1120     * String's compareTo.
1121     *
1122     * Requires r0/r1 to have been previously checked for null.  Will
1123     * return negative if this's string is < comp, 0 if they are the
1124     * same and positive if >.
1125     *
1126     * IMPORTANT NOTE:
1127     *
1128     * This code relies on hard-coded offsets for string objects, and must be
1129     * kept in sync with definitions in UtfString.h.  See asm-constants.h
1130     *
1131     * On entry:
1132     *    r0:   this object pointer
1133     *    r1:   comp object pointer
1134     *
1135     */
1136
1137    mov    r2, r0         @ this to r2, opening up r0 for return value
1138    subs   r0, r2, r1     @ Same?
1139    bxeq   lr
1140
1141    ldr    r4, [r2, #STRING_FIELDOFF_OFFSET]
1142    ldr    r9, [r1, #STRING_FIELDOFF_OFFSET]
1143    ldr    r7, [r2, #STRING_FIELDOFF_COUNT]
1144    ldr    r10, [r1, #STRING_FIELDOFF_COUNT]
1145    ldr    r2, [r2, #STRING_FIELDOFF_VALUE]
1146    ldr    r1, [r1, #STRING_FIELDOFF_VALUE]
1147
1148    /*
1149     * At this point, we have:
1150     *    value:  r2/r1
1151     *    offset: r4/r9
1152     *    count:  r7/r10
1153     * We're going to compute
1154     *    r11 <- countDiff
1155     *    r10 <- minCount
1156     */
1157     subs  r11, r7, r10
1158     movls r10, r7
1159
1160     /* Now, build pointers to the string data */
1161     add   r2, r2, r4, lsl #1
1162     add   r1, r1, r9, lsl #1
1163     /*
1164      * Note: data pointers point to previous element so we can use pre-index
1165      * mode with base writeback.
1166      */
1167     add   r2, #16-2   @ offset to contents[-1]
1168     add   r1, #16-2   @ offset to contents[-1]
1169
1170     /*
1171      * At this point we have:
1172      *   r2: *this string data
1173      *   r1: *comp string data
1174      *   r10: iteration count for comparison
1175      *   r11: value to return if the first part of the string is equal
1176      *   r0: reserved for result
1177      *   r3, r4, r7, r8, r9, r12 available for loading string data
1178      */
1179
1180    subs  r10, #2
1181    blt   do_remainder2
1182
1183      /*
1184       * Unroll the first two checks so we can quickly catch early mismatch
1185       * on long strings (but preserve incoming alignment)
1186       */
1187
1188    ldrh  r3, [r2, #2]!
1189    ldrh  r4, [r1, #2]!
1190    ldrh  r7, [r2, #2]!
1191    ldrh  r8, [r1, #2]!
1192    subs  r0, r3, r4
1193    subeqs  r0, r7, r8
1194    bxne  lr
1195    cmp   r10, #28
1196    bgt   do_memcmp16
1197    subs  r10, #3
1198    blt   do_remainder
1199
1200loopback_triple:
1201    ldrh  r3, [r2, #2]!
1202    ldrh  r4, [r1, #2]!
1203    ldrh  r7, [r2, #2]!
1204    ldrh  r8, [r1, #2]!
1205    ldrh  r9, [r2, #2]!
1206    ldrh  r12,[r1, #2]!
1207    subs  r0, r3, r4
1208    subeqs  r0, r7, r8
1209    subeqs  r0, r9, r12
1210    bxne  lr
1211    subs  r10, #3
1212    bge   loopback_triple
1213
1214do_remainder:
1215    adds  r10, #3
1216    beq   returnDiff
1217
1218loopback_single:
1219    ldrh  r3, [r2, #2]!
1220    ldrh  r4, [r1, #2]!
1221    subs  r0, r3, r4
1222    bxne  lr
1223    subs  r10, #1
1224    bne     loopback_single
1225
1226returnDiff:
1227    mov   r0, r11
1228    bx    lr
1229
1230do_remainder2:
1231    adds  r10, #2
1232    bne   loopback_single
1233    mov   r0, r11
1234    bx    lr
1235
1236    /* Long string case */
1237do_memcmp16:
1238    mov   r4, lr
1239    ldr   lr, .Lmemcmp16
1240    mov   r7, r11
1241    add   r0, r2, #2
1242    add   r1, r1, #2
1243    mov   r2, r10
1244    blx   lr
1245    cmp   r0, #0
1246    bxne  r4
1247    mov   r0, r7
1248    bx    r4
1249
1250.Lmemcmp16:
1251    .word __memcmp16
1252
1253/* ------------------------------ */
1254    .balign 4
1255    .global dvmCompiler_TEMPLATE_STRING_INDEXOF
1256dvmCompiler_TEMPLATE_STRING_INDEXOF:
1257/* File: armv5te/TEMPLATE_STRING_INDEXOF.S */
1258    /*
1259     * String's indexOf.
1260     *
1261     * Requires r0 to have been previously checked for null.  Will
1262     * return index of match of r1 in r0.
1263     *
1264     * IMPORTANT NOTE:
1265     *
1266     * This code relies on hard-coded offsets for string objects, and must be
1267     * kept in sync wth definitions in UtfString.h  See asm-constants.h
1268     *
1269     * On entry:
1270     *    r0:   string object pointer
1271     *    r1:   char to match
1272     *    r2:   Starting offset in string data
1273     */
1274
1275    ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
1276    ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
1277    ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
1278
1279    /*
1280     * At this point, we have:
1281     *    r0: object pointer
1282     *    r1: char to match
1283     *    r2: starting offset
1284     *    r7: offset
1285     *    r8: string length
1286     */
1287
1288     /* Build pointer to start of string data */
1289     add   r0, #16
1290     add   r0, r0, r7, lsl #1
1291
1292     /* Save a copy of starting data in r7 */
1293     mov   r7, r0
1294
1295     /* Clamp start to [0..count] */
1296     cmp   r2, #0
1297     movlt r2, #0
1298     cmp   r2, r8
1299     movgt r2, r8
1300
1301     /* Build pointer to start of data to compare and pre-bias */
1302     add   r0, r0, r2, lsl #1
1303     sub   r0, #2
1304
1305     /* Compute iteration count */
1306     sub   r8, r2
1307
1308     /*
1309      * At this point we have:
1310      *   r0: start of data to test
1311      *   r1: chat to compare
1312      *   r8: iteration count
1313      *   r7: original start of string
1314      *   r3, r4, r9, r10, r11, r12 available for loading string data
1315      */
1316
1317    subs  r8, #4
1318    blt   indexof_remainder
1319
1320indexof_loop4:
1321    ldrh  r3, [r0, #2]!
1322    ldrh  r4, [r0, #2]!
1323    ldrh  r10, [r0, #2]!
1324    ldrh  r11, [r0, #2]!
1325    cmp   r3, r1
1326    beq   match_0
1327    cmp   r4, r1
1328    beq   match_1
1329    cmp   r10, r1
1330    beq   match_2
1331    cmp   r11, r1
1332    beq   match_3
1333    subs  r8, #4
1334    bge   indexof_loop4
1335
1336indexof_remainder:
1337    adds    r8, #4
1338    beq     indexof_nomatch
1339
1340indexof_loop1:
1341    ldrh  r3, [r0, #2]!
1342    cmp   r3, r1
1343    beq   match_3
1344    subs  r8, #1
1345    bne   indexof_loop1
1346
1347indexof_nomatch:
1348    mov   r0, #-1
1349    bx    lr
1350
1351match_0:
1352    sub   r0, #6
1353    sub   r0, r7
1354    asr   r0, r0, #1
1355    bx    lr
1356match_1:
1357    sub   r0, #4
1358    sub   r0, r7
1359    asr   r0, r0, #1
1360    bx    lr
1361match_2:
1362    sub   r0, #2
1363    sub   r0, r7
1364    asr   r0, r0, #1
1365    bx    lr
1366match_3:
1367    sub   r0, r7
1368    asr   r0, r0, #1
1369    bx    lr
1370
1371/* ------------------------------ */
1372    .balign 4
1373    .global dvmCompiler_TEMPLATE_INTERPRET
1374dvmCompiler_TEMPLATE_INTERPRET:
1375/* File: armv5te/TEMPLATE_INTERPRET.S */
1376    /*
1377     * This handler transfers control to the interpeter without performing
1378     * any lookups.  It may be called either as part of a normal chaining
1379     * operation, or from the transition code in header.S.  We distinquish
1380     * the two cases by looking at the link register.  If called from a
1381     * translation chain, it will point to the chaining Dalvik PC -3.
1382     * On entry:
1383     *    lr - if NULL:
1384     *        r1 - the Dalvik PC to begin interpretation.
1385     *    else
1386     *        [lr, #3] contains Dalvik PC to begin interpretation
1387     *    rGLUE - pointer to interpState
1388     *    rFP - Dalvik frame pointer
1389     */
1390    cmp     lr, #0
1391#if defined(WORKAROUND_CORTEX_A9_745320)
1392    /* Don't use conditional loads if the HW defect exists */
1393    beq     101f
1394    ldr     r1,[lr, #3]
1395101:
1396#else
1397    ldrne   r1,[lr, #3]
1398#endif
1399    ldr     r2, .LinterpPunt
1400    mov     r0, r1                       @ set Dalvik PC
1401    bx      r2
1402    @ doesn't return
1403
1404.LinterpPunt:
1405    .word   dvmJitToInterpPunt
1406
1407/* ------------------------------ */
1408    .balign 4
1409    .global dvmCompiler_TEMPLATE_MONITOR_ENTER
1410dvmCompiler_TEMPLATE_MONITOR_ENTER:
1411/* File: armv5te/TEMPLATE_MONITOR_ENTER.S */
1412    /*
1413     * Call out to the runtime to lock an object.  Because this thread
1414     * may have been suspended in THREAD_MONITOR state and the Jit's
1415     * translation cache subsequently cleared, we cannot return directly.
1416     * Instead, unconditionally transition to the interpreter to resume.
1417     *
1418     * On entry:
1419     *    r0 - self pointer
1420     *    r1 - the object (which has already been null-checked by the caller
1421     *    r4 - the Dalvik PC of the following instruction.
1422     */
1423    ldr     r2, .LdvmLockObject
1424    mov     r3, #0                       @ Record that we're not returning
1425    str     r3, [r0, #offThread_inJitCodeCache]
1426    blx     r2                           @ dvmLockObject(self, obj)
1427    @ refresh Jit's on/off status
1428    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1429    ldr     r0, [r0]
1430    ldr     r2, .LdvmJitToInterpNoChain
1431    str     r0, [rGLUE, #offGlue_pJitProfTable]
1432    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1433#if defined(WITH_JIT_TUNING)
1434    mov     r0, #kHeavyweightMonitor
1435#endif
1436    bx      r2
1437
1438/* ------------------------------ */
1439    .balign 4
1440    .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
1441dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
1442/* File: armv5te/TEMPLATE_MONITOR_ENTER_DEBUG.S */
1443    /*
1444     * To support deadlock prediction, this version of MONITOR_ENTER
1445     * will always call the heavyweight dvmLockObject, check for an
1446     * exception and then bail out to the interpreter.
1447     *
1448     * On entry:
1449     *    r0 - self pointer
1450     *    r1 - the object (which has already been null-checked by the caller
1451     *    r4 - the Dalvik PC of the following instruction.
1452     *
1453     */
1454    ldr     r2, .LdvmLockObject
1455    mov     r3, #0                       @ Record that we're not returning
1456    str     r3, [r0, #offThread_inJitCodeCache]
1457    blx     r2             @ dvmLockObject(self, obj)
1458    @ refresh Jit's on/off status & test for exception
1459    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1460    ldr     r1, [rGLUE, #offGlue_self]
1461    ldr     r0, [r0]
1462    ldr     r1, [r1, #offThread_exception]
1463    str     r0, [rGLUE, #offGlue_pJitProfTable]
1464    cmp     r1, #0
1465    beq     1f
1466    ldr     r2, .LhandleException
1467    sub     r0, r4, #2     @ roll dPC back to this monitor instruction
1468    bx      r2
14691:
1470    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1471#if defined(WITH_JIT_TUNING)
1472    mov     r0, #kHeavyweightMonitor
1473#endif
1474    ldr     pc, .LdvmJitToInterpNoChain
1475
1476    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
1477/* File: armv5te/footer.S */
1478/*
1479 * ===========================================================================
1480 *  Common subroutines and data
1481 * ===========================================================================
1482 */
1483
1484    .text
1485    .align  2
1486.LinvokeNative:
1487    @ Prep for the native call
1488    @ r1 = newFP, r0 = methodToCall
1489    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
1490    mov     r2, #0
1491    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
1492    str     r2, [r3, #offThread_inJitCodeCache] @ not in jit code cache
1493    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
1494    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
1495                                        @ newFp->localRefCookie=top
1496    mov     r9, r3                      @ r9<- glue->self (preserve)
1497    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
1498
1499    mov     r2, r0                      @ r2<- methodToCall
1500    mov     r0, r1                      @ r0<- newFP
1501    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
1502#if defined(WITH_INLINE_PROFILING)
1503    @ r2: methodToCall, r6: rGLUE
1504    stmfd   sp!, {r2,r6}
1505    stmfd   sp!, {r0-r3}
1506    mov     r0, r2
1507    mov     r1, r6
1508    mov     lr, pc
1509    ldr     pc, .LdvmFastMethodTraceEnter
1510    ldmfd   sp!, {r0-r3}
1511#endif
1512
1513    mov     lr, pc
1514    ldr     pc, [r2, #offMethod_nativeFunc]
1515
1516#if defined(WITH_INLINE_PROFILING)
1517    ldmfd   sp!, {r0-r1}
1518    mov     lr, pc
1519    ldr     pc, .LdvmFastNativeMethodTraceExit
1520#endif
1521    @ Refresh Jit's on/off status
1522    ldr     r3, [rGLUE, #offGlue_ppJitProfTable]
1523
1524    @ native return; r9=self, r10=newSaveArea
1525    @ equivalent to dvmPopJniLocals
1526    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
1527    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
1528    ldr     r1, [r9, #offThread_exception] @ check for exception
1529    ldr     r3, [r3]    @ r1 <- pointer to Jit profile table
1530    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
1531    cmp     r1, #0                      @ null?
1532    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
1533    ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
1534    str     r3, [rGLUE, #offGlue_pJitProfTable]  @ cache current JitProfTable
1535
1536    @ r0 = dalvikCallsitePC
1537    bne     .LhandleException           @ no, handle exception
1538
1539    str     r2, [r9, #offThread_inJitCodeCache] @ set the new mode
1540    cmp     r2, #0                      @ return chaining cell still exists?
1541    bxne    r2                          @ yes - go ahead
1542
1543    @ continue executing the next instruction through the interpreter
1544    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
1545    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
1546#if defined(WITH_JIT_TUNING)
1547    mov     r0, #kCallsiteInterpreted
1548#endif
1549    mov     pc, r1
1550
1551/*
1552 * On entry:
1553 * r0  Faulting Dalvik PC
1554 */
1555.LhandleException:
1556#if defined(WITH_SELF_VERIFICATION)
1557    ldr     pc, .LdeadFood @ should not see this under self-verification mode
1558.LdeadFood:
1559    .word   0xdeadf00d
1560#endif
1561    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
1562    mov     r2, #0
1563    str     r2, [r3, #offThread_inJitCodeCache] @ in interpreter land
1564    ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
1565    ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
1566    mov     rPC, r0                 @ reload the faulting Dalvik address
1567    mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
1568
1569    .align  2
1570.LdvmAsmInstructionStart:
1571    .word   dvmAsmInstructionStart
1572.LdvmJitToInterpNoChainNoProfile:
1573    .word   dvmJitToInterpNoChainNoProfile
1574.LdvmJitToInterpTraceSelectNoChain:
1575    .word   dvmJitToInterpTraceSelectNoChain
1576.LdvmJitToInterpNoChain:
1577    .word   dvmJitToInterpNoChain
1578.LdvmMterpStdBail:
1579    .word   dvmMterpStdBail
1580.LdvmMterpCommonExceptionThrown:
1581    .word   dvmMterpCommonExceptionThrown
1582.LdvmLockObject:
1583    .word   dvmLockObject
1584#if defined(WITH_JIT_TUNING)
1585.LdvmICHitCount:
1586    .word   gDvmICHitCount
1587#endif
1588#if defined(WITH_SELF_VERIFICATION)
1589.LdvmSelfVerificationMemOpDecode:
1590    .word   dvmSelfVerificationMemOpDecode
1591#endif
1592#if defined(WITH_INLINE_PROFILING)
1593.LdvmFastMethodTraceEnter:
1594    .word   dvmFastMethodTraceEnter
1595.LdvmFastNativeMethodTraceExit:
1596    .word   dvmFastNativeMethodTraceExit
1597.LdvmFastJavaMethodTraceExit:
1598    .word   dvmFastJavaMethodTraceExit
1599#endif
1600.L__aeabi_cdcmple:
1601    .word   __aeabi_cdcmple
1602.L__aeabi_cfcmple:
1603    .word   __aeabi_cfcmple
1604
1605    .global dmvCompilerTemplateEnd
1606dmvCompilerTemplateEnd:
1607
1608#endif /* WITH_JIT */
1609
1610