CompilerTemplateAsm-armv7-a-neon.S revision 5cc61d70ec727aa22f58463bf7940cc717cf3eb1
1/*
2 * This file was generated automatically by gen-template.py for 'armv7-a-neon'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: armv5te/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * ARMv5 definitions and declarations.
28 */
29
30/*
31ARM EABI general notes:
32
33r0-r3 hold first 4 args to a method; they are not preserved across method calls
34r4-r8 are available for general use
35r9 is given special treatment in some situations, but not for us
36r10 (sl) seems to be generally available
37r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
38r12 (ip) is scratch -- not preserved across method calls
39r13 (sp) should be managed carefully in case a signal arrives
40r14 (lr) must be preserved
41r15 (pc) can be tinkered with directly
42
43r0 holds returns of <= 4 bytes
44r0-r1 hold returns of 8 bytes, low word in r0
45
46Callee must save/restore r4+ (except r12) if it modifies them.
47
48Stack is "full descending".  Only the arguments that don't fit in the first 4
49registers are placed on the stack.  "sp" points at the first stacked argument
50(i.e. the 5th arg).
51
52VFP: single-precision results in s0, double-precision results in d0.
53
54In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
5564-bit quantities (long long, double) must be 64-bit aligned.
56*/
57
58/*
59JIT and ARM notes:
60
61The following registers have fixed assignments:
62
63  reg nick      purpose
64  r5  rFP       interpreted frame pointer, used for accessing locals and args
65  r6  rGLUE     MterpGlue pointer
66
67The following registers have fixed assignments in mterp but are scratch
68registers in compiled code
69
70  reg nick      purpose
71  r4  rPC       interpreted program counter, used for fetching instructions
72  r7  rINST     first 16-bit code unit of current instruction
73  r8  rIBASE    interpreted instruction base pointer, used for computed goto
74
75Macros are provided for common operations.  Each macro MUST emit only
76one instruction to make instruction-counting easier.  They MUST NOT alter
77unspecified registers or condition codes.
78*/
79
80/* single-purpose registers, given names for clarity */
81#define rPC     r4
82#define rFP     r5
83#define rGLUE   r6
84#define rINST   r7
85#define rIBASE  r8
86
87/*
88 * Given a frame pointer, find the stack save area.
89 *
90 * In C this is "((StackSaveArea*)(_fp) -1)".
91 */
92#define SAVEAREA_FROM_FP(_reg, _fpreg) \
93    sub     _reg, _fpreg, #sizeofStackSaveArea
94
95#define EXPORT_PC() \
96    str     rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
97
98/*
99 * This is a #include, not a %include, because we want the C pre-processor
100 * to expand the macros into assembler assignment statements.
101 */
102#include "../../../mterp/common/asm-constants.h"
103
104/* File: armv5te-vfp/platform.S */
105/*
106 * ===========================================================================
107 *  CPU-version-specific defines and utility
108 * ===========================================================================
109 */
110
111/*
112 * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
113 * Jump to subroutine.
114 *
115 * May modify IP and LR.
116 */
117.macro  LDR_PC_LR source
118    mov     lr, pc
119    ldr     pc, \source
120.endm
121
122
123    .global dvmCompilerTemplateStart
124    .type   dvmCompilerTemplateStart, %function
125    .text
126
127dvmCompilerTemplateStart:
128
129/* ------------------------------ */
130    .balign 4
131    .global dvmCompiler_TEMPLATE_CMP_LONG
132dvmCompiler_TEMPLATE_CMP_LONG:
133/* File: armv5te/TEMPLATE_CMP_LONG.S */
134    /*
135     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
136     * register based on the results of the comparison.
137     *
138     * We load the full values with LDM, but in practice many values could
139     * be resolved by only looking at the high word.  This could be made
140     * faster or slower by splitting the LDM into a pair of LDRs.
141     *
142     * If we just wanted to set condition flags, we could do this:
143     *  subs    ip, r0, r2
144     *  sbcs    ip, r1, r3
145     *  subeqs  ip, r0, r2
146     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
147     * integer value, which we can do with 2 conditional mov/mvn instructions
148     * (set 1, set -1; if they're equal we already have 0 in ip), giving
149     * us a constant 5-cycle path plus a branch at the end to the
150     * instruction epilogue code.  The multi-compare approach below needs
151     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
152     * in the worst case (the 64-bit values are equal).
153     */
154    /* cmp-long vAA, vBB, vCC */
155    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
156    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
157    bgt     .LTEMPLATE_CMP_LONG_greater
158    subs    r0, r0, r2                  @ r0<- r0 - r2
159    bxeq     lr
160    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
161.LTEMPLATE_CMP_LONG_less:
162    mvn     r0, #0                      @ r0<- -1
163    bx      lr
164.LTEMPLATE_CMP_LONG_greater:
165    mov     r0, #1                      @ r0<- 1
166    bx      lr
167
168/* ------------------------------ */
169    .balign 4
170    .global dvmCompiler_TEMPLATE_RETURN
171dvmCompiler_TEMPLATE_RETURN:
172/* File: armv5te/TEMPLATE_RETURN.S */
173    /*
174     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
175     * If the stored value in returnAddr
176     * is non-zero, the caller is compiled by the JIT thus return to the
177     * address in the code cache following the invoke instruction. Otherwise
178     * return to the special dvmJitToInterpNoChain entry point.
179     */
180#if defined(WITH_INLINE_PROFILING)
181    stmfd   sp!, {r0-r2,lr}             @ preserve live registers
182    mov     r0, r6
183    @ r0=rGlue
184    LDR_PC_LR ".LdvmFastJavaMethodTraceExit"
185    ldmfd   sp!, {r0-r2,lr}             @ restore live registers
186#endif
187    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
188    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
189    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
190    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
191#if !defined(WITH_SELF_VERIFICATION)
192    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
193#else
194    mov     r9, #0                      @ disable chaining
195#endif
196    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
197                                        @ r2<- method we're returning to
198    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
199    cmp     r2, #0                      @ break frame?
200#if !defined(WITH_SELF_VERIFICATION)
201    beq     1f                          @ bail to interpreter
202#else
203    blxeq   lr                          @ punt to interpreter and compare state
204#endif
205    ldr     r1, .LdvmJitToInterpNoChainNoProfile @ defined in footer.S
206    mov     rFP, r10                    @ publish new FP
207    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
208    ldr     r8, [r8]                    @ r8<- suspendCount
209
210    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
211    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
212    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
213    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
214    str     r0, [rGLUE, #offGlue_methodClassDex]
215    cmp     r8, #0                      @ check the suspendCount
216    movne   r9, #0                      @ clear the chaining cell address
217    str     r9, [r3, #offThread_inJitCodeCache] @ in code cache or not
218    cmp     r9, #0                      @ chaining cell exists?
219    blxne   r9                          @ jump to the chaining cell
220#if defined(WITH_JIT_TUNING)
221    mov     r0, #kCallsiteInterpreted
222#endif
223    mov     pc, r1                      @ callsite is interpreted
2241:
225    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
226    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
227    mov     r1, #0                      @ changeInterp = false
228    mov     r0, rGLUE                   @ Expecting rGLUE in r0
229    blx     r2                          @ exit the interpreter
230
231/* ------------------------------ */
232    .balign 4
233    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
234dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
235/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
236    /*
237     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
238     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
239     * runtime-resolved callee.
240     */
241    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
242    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
243    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
244    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
245    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
246    add     r3, r1, #1  @ Thumb addr is odd
247    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
248    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
249    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
250    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
251    ldr     r8, [r8]                    @ r8<- suspendCount (int)
252    cmp     r10, r9                     @ bottom < interpStackEnd?
253    bxlo    lr                          @ return to raise stack overflow excep.
254    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
255    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
256    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
257    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
258    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
259    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
260
261
262    @ set up newSaveArea
263    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
264    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
265    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
266    cmp     r8, #0                      @ suspendCount != 0
267    bxne    lr                          @ bail to the interpreter
268    tst     r10, #ACC_NATIVE
269#if !defined(WITH_SELF_VERIFICATION)
270    bne     .LinvokeNative
271#else
272    bxne    lr                          @ bail to the interpreter
273#endif
274
275    ldr     r10, .LdvmJitToInterpTraceSelectNoChain
276    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
277    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
278
279    @ Update "glue" values for the new method
280    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
281    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
282    mov     rFP, r1                         @ fp = newFp
283    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
284#if defined(WITH_INLINE_PROFILING)
285    stmfd   sp!, {r0-r3}                    @ preserve r0-r3
286    mov     r1, r6
287    @ r0=methodToCall, r1=rGlue
288    LDR_PC_LR ".LdvmFastMethodTraceEnter"
289    ldmfd   sp!, {r0-r3}                    @ restore r0-r3
290#endif
291
292    @ Start executing the callee
293#if defined(WITH_JIT_TUNING)
294    mov     r0, #kInlineCacheMiss
295#endif
296    mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
297
298/* ------------------------------ */
299    .balign 4
300    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
301dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
302/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
303    /*
304     * For monomorphic callsite, setup the Dalvik frame and return to the
305     * Thumb code through the link register to transfer control to the callee
306     * method through a dedicated chaining cell.
307     */
308    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
309    @ methodToCall is guaranteed to be non-native
310.LinvokeChain:
311    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
312    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
313    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
314    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
315    add     r3, r1, #1  @ Thumb addr is odd
316    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
317    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
318    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
319    add     r12, lr, #2                 @ setup the punt-to-interp address
320    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
321    ldr     r8, [r8]                    @ r8<- suspendCount (int)
322    cmp     r10, r9                     @ bottom < interpStackEnd?
323    bxlo    r12                         @ return to raise stack overflow excep.
324    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
325    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
326    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
327    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
328    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
329
330
331    @ set up newSaveArea
332    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
333    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
334    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
335    cmp     r8, #0                      @ suspendCount != 0
336    bxne    r12                         @ bail to the interpreter
337
338    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
339    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
340
341    @ Update "glue" values for the new method
342    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
343    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
344    mov     rFP, r1                         @ fp = newFp
345    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
346#if defined(WITH_INLINE_PROFILING)
347    stmfd   sp!, {r0-r2,lr}             @ preserve clobbered live registers
348    mov     r1, r6
349    @ r0=methodToCall, r1=rGlue
350    LDR_PC_LR ".LdvmFastMethodTraceEnter"
351    ldmfd   sp!, {r0-r2,lr}             @ restore registers
352#endif
353
354    bx      lr                              @ return to the callee-chaining cell
355
356/* ------------------------------ */
357    .balign 4
358    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
359dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
360/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
361    /*
362     * For polymorphic callsite, check whether the cached class pointer matches
363     * the current one. If so setup the Dalvik frame and return to the
364     * Thumb code through the link register to transfer control to the callee
365     * method through a dedicated chaining cell.
366     *
367     * The predicted chaining cell is declared in ArmLIR.h with the
368     * following layout:
369     *
370     *  typedef struct PredictedChainingCell {
371     *      u4 branch;
372     *      const ClassObject *clazz;
373     *      const Method *method;
374     *      u4 counter;
375     *  } PredictedChainingCell;
376     *
377     * Upon returning to the callsite:
378     *    - lr  : to branch to the chaining cell
379     *    - lr+2: to punt to the interpreter
380     *    - lr+4: to fully resolve the callee and may rechain.
381     *            r3 <- class
382     *            r9 <- counter
383     */
384    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
385    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
386    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
387    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
388    ldr     r9, [rGLUE, #offGlue_icRechainCount]   @ r1 <- shared rechainCount
389    cmp     r3, r8          @ predicted class == actual class?
390#if defined(WITH_JIT_TUNING)
391    ldr     r7, .LdvmICHitCount
392    ldreq   r10, [r7, #0]
393    add     r10, r10, #1
394    streq   r10, [r7, #0]
395#endif
396    beq     .LinvokeChain   @ predicted chain is valid
397    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
398    cmp     r8, #0          @ initialized class or not
399    moveq   r1, #0
400    subne   r1, r9, #1      @ count--
401    strne   r1, [rGLUE, #offGlue_icRechainCount]   @ write back to InterpState
402    add     lr, lr, #4      @ return to fully-resolve landing pad
403    /*
404     * r1 <- count
405     * r2 <- &predictedChainCell
406     * r3 <- this->class
407     * r4 <- dPC
408     * r7 <- this->class->vtable
409     */
410    bx      lr
411
412/* ------------------------------ */
413    .balign 4
414    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
415dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
416/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
417    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
418    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
419    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
420    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
421    add     r3, r1, #1  @ Thumb addr is odd
422    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
423    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
424    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
425    ldr     r8, [r8]                    @ r3<- suspendCount (int)
426    cmp     r10, r9                     @ bottom < interpStackEnd?
427    bxlo    lr                          @ return to raise stack overflow excep.
428    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
429    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
430    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
431    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
432
433
434    @ set up newSaveArea
435    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
436    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
437    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
438    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
439    cmp     r8, #0                      @ suspendCount != 0
440    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
441#if !defined(WITH_SELF_VERIFICATION)
442    bxne    lr                          @ bail to the interpreter
443#else
444    bx      lr                          @ bail to interpreter unconditionally
445#endif
446
447    @ go ahead and transfer control to the native code
448    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
449    mov     r2, #0
450    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
451    str     r2, [r3, #offThread_inJitCodeCache] @ not in the jit code cache
452    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
453                                        @ newFp->localRefCookie=top
454    mov     r9, r3                      @ r9<- glue->self (preserve)
455    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
456
457    mov     r2, r0                      @ r2<- methodToCall
458    mov     r0, r1                      @ r0<- newFP
459    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
460#if defined(WITH_INLINE_PROFILING)
461    @ r2=methodToCall, r6=rGLUE
462    stmfd   sp!, {r2,r6}                @ to be consumed after JNI return
463    stmfd   sp!, {r0-r3}                @ preserve r0-r3
464    mov     r0, r2
465    mov     r1, r6
466    @ r0=JNIMethod, r1=rGlue
467    LDR_PC_LR ".LdvmFastMethodTraceEnter"
468    ldmfd   sp!, {r0-r3}                @ restore r0-r3
469#endif
470
471    blx     r8                          @ off to the native code
472
473#if defined(WITH_INLINE_PROFILING)
474    ldmfd   sp!, {r0-r1}                @ restore r2 and r6
475    @ r0=JNIMethod, r1=rGlue
476    LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
477#endif
478    @ native return; r9=self, r10=newSaveArea
479    @ equivalent to dvmPopJniLocals
480    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
481    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
482    ldr     r1, [r9, #offThread_exception] @ check for exception
483    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
484    cmp     r1, #0                      @ null?
485    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
486    ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
487
488    @ r0 = dalvikCallsitePC
489    bne     .LhandleException           @ no, handle exception
490
491    str     r2, [r9, #offThread_inJitCodeCache] @ set the mode properly
492    cmp     r2, #0                      @ return chaining cell still exists?
493    bxne    r2                          @ yes - go ahead
494
495    @ continue executing the next instruction through the interpreter
496    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
497    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
498#if defined(WITH_JIT_TUNING)
499    mov     r0, #kCallsiteInterpreted
500#endif
501    mov     pc, r1
502
503/* ------------------------------ */
504    .balign 4
505    .global dvmCompiler_TEMPLATE_MUL_LONG
506dvmCompiler_TEMPLATE_MUL_LONG:
507/* File: armv5te/TEMPLATE_MUL_LONG.S */
508    /*
509     * Signed 64-bit integer multiply.
510     *
511     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
512     *
513     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
514     *        WX
515     *      x YZ
516     *  --------
517     *     ZW ZX
518     *  YW YX
519     *
520     * The low word of the result holds ZX, the high word holds
521     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
522     * it doesn't fit in the low 64 bits.
523     *
524     * Unlike most ARM math operations, multiply instructions have
525     * restrictions on using the same register more than once (Rd and Rm
526     * cannot be the same).
527     */
528    /* mul-long vAA, vBB, vCC */
529    mul     ip, r2, r1                  @  ip<- ZxW
530    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
531    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
532    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
533    mov     r0,r9
534    mov     r1,r10
535    bx      lr
536
537/* ------------------------------ */
538    .balign 4
539    .global dvmCompiler_TEMPLATE_SHL_LONG
540dvmCompiler_TEMPLATE_SHL_LONG:
541/* File: armv5te/TEMPLATE_SHL_LONG.S */
542    /*
543     * Long integer shift.  This is different from the generic 32/64-bit
544     * binary operations because vAA/vBB are 64-bit but vCC (the shift
545     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
546     * 6 bits.
547     */
548    /* shl-long vAA, vBB, vCC */
549    and     r2, r2, #63                 @ r2<- r2 & 0x3f
550    mov     r1, r1, asl r2              @  r1<- r1 << r2
551    rsb     r3, r2, #32                 @  r3<- 32 - r2
552    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
553    subs    ip, r2, #32                 @  ip<- r2 - 32
554    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
555    mov     r0, r0, asl r2              @  r0<- r0 << r2
556    bx      lr
557
558/* ------------------------------ */
559    .balign 4
560    .global dvmCompiler_TEMPLATE_SHR_LONG
561dvmCompiler_TEMPLATE_SHR_LONG:
562/* File: armv5te/TEMPLATE_SHR_LONG.S */
563    /*
564     * Long integer shift.  This is different from the generic 32/64-bit
565     * binary operations because vAA/vBB are 64-bit but vCC (the shift
566     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
567     * 6 bits.
568     */
569    /* shr-long vAA, vBB, vCC */
570    and     r2, r2, #63                 @ r0<- r0 & 0x3f
571    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
572    rsb     r3, r2, #32                 @  r3<- 32 - r2
573    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
574    subs    ip, r2, #32                 @  ip<- r2 - 32
575    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
576    mov     r1, r1, asr r2              @  r1<- r1 >> r2
577    bx      lr
578
579/* ------------------------------ */
580    .balign 4
581    .global dvmCompiler_TEMPLATE_USHR_LONG
582dvmCompiler_TEMPLATE_USHR_LONG:
583/* File: armv5te/TEMPLATE_USHR_LONG.S */
584    /*
585     * Long integer shift.  This is different from the generic 32/64-bit
586     * binary operations because vAA/vBB are 64-bit but vCC (the shift
587     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
588     * 6 bits.
589     */
590    /* ushr-long vAA, vBB, vCC */
591    and     r2, r2, #63                 @ r0<- r0 & 0x3f
592    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
593    rsb     r3, r2, #32                 @  r3<- 32 - r2
594    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
595    subs    ip, r2, #32                 @  ip<- r2 - 32
596    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
597    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
598    bx      lr
599
600/* ------------------------------ */
601    .balign 4
602    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
603dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
604/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
605/* File: armv5te-vfp/fbinop.S */
606    /*
607     * Generic 32-bit floating point operation.  Provide an "instr" line that
608     * specifies an instruction that performs s2 = s0 op s1.
609     *
610     * On entry:
611     *     r0 = target dalvik register address
612     *     r1 = op1 address
613     *     r2 = op2 address
614     */
615     flds    s0,[r1]
616     flds    s1,[r2]
617     fadds   s2, s0, s1
618     fsts    s2,[r0]
619     bx      lr
620
621
622/* ------------------------------ */
623    .balign 4
624    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
625dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
626/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
627/* File: armv5te-vfp/fbinop.S */
628    /*
629     * Generic 32-bit floating point operation.  Provide an "instr" line that
630     * specifies an instruction that performs s2 = s0 op s1.
631     *
632     * On entry:
633     *     r0 = target dalvik register address
634     *     r1 = op1 address
635     *     r2 = op2 address
636     */
637     flds    s0,[r1]
638     flds    s1,[r2]
639     fsubs   s2, s0, s1
640     fsts    s2,[r0]
641     bx      lr
642
643
644/* ------------------------------ */
645    .balign 4
646    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
647dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
648/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
649/* File: armv5te-vfp/fbinop.S */
650    /*
651     * Generic 32-bit floating point operation.  Provide an "instr" line that
652     * specifies an instruction that performs s2 = s0 op s1.
653     *
654     * On entry:
655     *     r0 = target dalvik register address
656     *     r1 = op1 address
657     *     r2 = op2 address
658     */
659     flds    s0,[r1]
660     flds    s1,[r2]
661     fmuls   s2, s0, s1
662     fsts    s2,[r0]
663     bx      lr
664
665
666/* ------------------------------ */
667    .balign 4
668    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
669dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
670/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
671/* File: armv5te-vfp/fbinop.S */
672    /*
673     * Generic 32-bit floating point operation.  Provide an "instr" line that
674     * specifies an instruction that performs s2 = s0 op s1.
675     *
676     * On entry:
677     *     r0 = target dalvik register address
678     *     r1 = op1 address
679     *     r2 = op2 address
680     */
681     flds    s0,[r1]
682     flds    s1,[r2]
683     fdivs   s2, s0, s1
684     fsts    s2,[r0]
685     bx      lr
686
687
688/* ------------------------------ */
689    .balign 4
690    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
691dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
692/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
693/* File: armv5te-vfp/fbinopWide.S */
694    /*
695     * Generic 64-bit floating point operation.  Provide an "instr" line that
696     * specifies an instruction that performs s2 = s0 op s1.
697     *
698     * On entry:
699     *     r0 = target dalvik register address
700     *     r1 = op1 address
701     *     r2 = op2 address
702     */
703     fldd    d0,[r1]
704     fldd    d1,[r2]
705     faddd   d2, d0, d1
706     fstd    d2,[r0]
707     bx      lr
708
709
710/* ------------------------------ */
711    .balign 4
712    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
713dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
714/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
715/* File: armv5te-vfp/fbinopWide.S */
716    /*
717     * Generic 64-bit floating point operation.  Provide an "instr" line that
718     * specifies an instruction that performs s2 = s0 op s1.
719     *
720     * On entry:
721     *     r0 = target dalvik register address
722     *     r1 = op1 address
723     *     r2 = op2 address
724     */
725     fldd    d0,[r1]
726     fldd    d1,[r2]
727     fsubd   d2, d0, d1
728     fstd    d2,[r0]
729     bx      lr
730
731
732/* ------------------------------ */
733    .balign 4
734    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
735dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
736/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
737/* File: armv5te-vfp/fbinopWide.S */
738    /*
739     * Generic 64-bit floating point operation.  Provide an "instr" line that
740     * specifies an instruction that performs s2 = s0 op s1.
741     *
742     * On entry:
743     *     r0 = target dalvik register address
744     *     r1 = op1 address
745     *     r2 = op2 address
746     */
747     fldd    d0,[r1]
748     fldd    d1,[r2]
749     fmuld   d2, d0, d1
750     fstd    d2,[r0]
751     bx      lr
752
753
754/* ------------------------------ */
755    .balign 4
756    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
757dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
758/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
759/* File: armv5te-vfp/fbinopWide.S */
760    /*
761     * Generic 64-bit floating point operation.  Provide an "instr" line that
762     * specifies an instruction that performs s2 = s0 op s1.
763     *
764     * On entry:
765     *     r0 = target dalvik register address
766     *     r1 = op1 address
767     *     r2 = op2 address
768     */
769     fldd    d0,[r1]
770     fldd    d1,[r2]
771     fdivd   d2, d0, d1
772     fstd    d2,[r0]
773     bx      lr
774
775
776/* ------------------------------ */
777    .balign 4
778    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
779dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
780/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
781/* File: armv5te-vfp/funopNarrower.S */
782    /*
783     * Generic 64bit-to-32bit floating point unary operation.  Provide an
784     * "instr" line that specifies an instruction that performs "s0 = op d0".
785     *
786     * For: double-to-int, double-to-float
787     *
788     * On entry:
789     *     r0 = target dalvik register address
790     *     r1 = src dalvik register address
791     */
792    /* unop vA, vB */
793    fldd    d0, [r1]                    @ d0<- vB
794    fcvtsd  s0, d0                              @ s0<- op d0
795    fsts    s0, [r0]                    @ vA<- s0
796    bx      lr
797
798
799/* ------------------------------ */
800    .balign 4
801    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
802dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
803/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
804/* File: armv5te-vfp/funopNarrower.S */
805    /*
806     * Generic 64bit-to-32bit floating point unary operation.  Provide an
807     * "instr" line that specifies an instruction that performs "s0 = op d0".
808     *
809     * For: double-to-int, double-to-float
810     *
811     * On entry:
812     *     r0 = target dalvik register address
813     *     r1 = src dalvik register address
814     */
815    /* unop vA, vB */
816    fldd    d0, [r1]                    @ d0<- vB
817    ftosizd  s0, d0                              @ s0<- op d0
818    fsts    s0, [r0]                    @ vA<- s0
819    bx      lr
820
821
822/* ------------------------------ */
823    .balign 4
824    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
825dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
826/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
827/* File: armv5te-vfp/funopWider.S */
828    /*
829     * Generic 32bit-to-64bit floating point unary operation.  Provide an
830     * "instr" line that specifies an instruction that performs "d0 = op s0".
831     *
832     * For: int-to-double, float-to-double
833     *
834     * On entry:
835     *     r0 = target dalvik register address
836     *     r1 = src dalvik register address
837     */
838    /* unop vA, vB */
839    flds    s0, [r1]                    @ s0<- vB
840    fcvtds  d0, s0                              @ d0<- op s0
841    fstd    d0, [r0]                    @ vA<- d0
842    bx      lr
843
844
845/* ------------------------------ */
846    .balign 4
847    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
848dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
849/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
850/* File: armv5te-vfp/funop.S */
851    /*
852     * Generic 32bit-to-32bit floating point unary operation.  Provide an
853     * "instr" line that specifies an instruction that performs "s1 = op s0".
854     *
855     * For: float-to-int, int-to-float
856     *
857     * On entry:
858     *     r0 = target dalvik register address
859     *     r1 = src dalvik register address
860     */
861    /* unop vA, vB */
862    flds    s0, [r1]                    @ s0<- vB
863    ftosizs s1, s0                              @ s1<- op s0
864    fsts    s1, [r0]                    @ vA<- s1
865    bx      lr
866
867
868/* ------------------------------ */
869    .balign 4
870    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
871dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
872/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
873/* File: armv5te-vfp/funopWider.S */
874    /*
875     * Generic 32bit-to-64bit floating point unary operation.  Provide an
876     * "instr" line that specifies an instruction that performs "d0 = op s0".
877     *
878     * For: int-to-double, float-to-double
879     *
880     * On entry:
881     *     r0 = target dalvik register address
882     *     r1 = src dalvik register address
883     */
884    /* unop vA, vB */
885    flds    s0, [r1]                    @ s0<- vB
886    fsitod  d0, s0                              @ d0<- op s0
887    fstd    d0, [r0]                    @ vA<- d0
888    bx      lr
889
890
891/* ------------------------------ */
892    .balign 4
893    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
894dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
895/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
896/* File: armv5te-vfp/funop.S */
897    /*
898     * Generic 32bit-to-32bit floating point unary operation.  Provide an
899     * "instr" line that specifies an instruction that performs "s1 = op s0".
900     *
901     * For: float-to-int, int-to-float
902     *
903     * On entry:
904     *     r0 = target dalvik register address
905     *     r1 = src dalvik register address
906     */
907    /* unop vA, vB */
908    flds    s0, [r1]                    @ s0<- vB
909    fsitos  s1, s0                              @ s1<- op s0
910    fsts    s1, [r0]                    @ vA<- s1
911    bx      lr
912
913
914/* ------------------------------ */
915    .balign 4
916    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
917dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
918/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
919    /*
920     * Compare two floating-point values.  Puts 0, 1, or -1 into the
921     * destination register based on the results of the comparison.
922     *
923     * int compare(x, y) {
924     *     if (x == y) {
925     *         return 0;
926     *     } else if (x < y) {
927     *         return -1;
928     *     } else if (x > y) {
929     *         return 1;
930     *     } else {
931     *         return 1;
932     *     }
933     * }
934     *
935     * On entry:
936     *    r0 = &op1 [vBB]
937     *    r1 = &op2 [vCC]
938     */
939    /* op vAA, vBB, vCC */
940    fldd    d0, [r0]                    @ d0<- vBB
941    fldd    d1, [r1]                    @ d1<- vCC
942    fcmpd  d0, d1                       @ compare (vBB, vCC)
943    mov     r0, #1                      @ r0<- 1 (default)
944    fmstat                              @ export status flags
945    mvnmi   r0, #0                      @ (less than) r0<- -1
946    moveq   r0, #0                      @ (equal) r0<- 0
947    bx      lr
948
949/* ------------------------------ */
950    .balign 4
951    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
952dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
953/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
954    /*
955     * Compare two floating-point values.  Puts 0, 1, or -1 into the
956     * destination register based on the results of the comparison.
957     *
958     * int compare(x, y) {
959     *     if (x == y) {
960     *         return 0;
961     *     } else if (x > y) {
962     *         return 1;
963     *     } else if (x < y) {
964     *         return -1;
965     *     } else {
966     *         return -1;
967     *     }
968     * }
969     * On entry:
970     *    r0 = &op1 [vBB]
971     *    r1 = &op2 [vCC]
972     */
973    /* op vAA, vBB, vCC */
974    fldd    d0, [r0]                    @ d0<- vBB
975    fldd    d1, [r1]                    @ d1<- vCC
976    fcmped  d0, d1                      @ compare (vBB, vCC)
977    mvn     r0, #0                      @ r0<- -1 (default)
978    fmstat                              @ export status flags
979    movgt   r0, #1                      @ (greater than) r0<- 1
980    moveq   r0, #0                      @ (equal) r0<- 0
981    bx      lr
982
983/* ------------------------------ */
984    .balign 4
985    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
986dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
987/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
988    /*
989     * Compare two floating-point values.  Puts 0, 1, or -1 into the
990     * destination register based on the results of the comparison.
991     *
992     * int compare(x, y) {
993     *     if (x == y) {
994     *         return 0;
995     *     } else if (x < y) {
996     *         return -1;
997     *     } else if (x > y) {
998     *         return 1;
999     *     } else {
1000     *         return 1;
1001     *     }
1002     * }
1003     * On entry:
1004     *    r0 = &op1 [vBB]
1005     *    r1 = &op2 [vCC]
1006     */
1007    /* op vAA, vBB, vCC */
1008    flds    s0, [r0]                    @ d0<- vBB
1009    flds    s1, [r1]                    @ d1<- vCC
1010    fcmps  s0, s1                      @ compare (vBB, vCC)
1011    mov     r0, #1                      @ r0<- 1 (default)
1012    fmstat                              @ export status flags
1013    mvnmi   r0, #0                      @ (less than) r0<- -1
1014    moveq   r0, #0                      @ (equal) r0<- 0
1015    bx      lr
1016
1017/* ------------------------------ */
1018    .balign 4
1019    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
1020dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
1021/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
1022    /*
1023     * Compare two floating-point values.  Puts 0, 1, or -1 into the
1024     * destination register based on the results of the comparison.
1025     *
1026     * int compare(x, y) {
1027     *     if (x == y) {
1028     *         return 0;
1029     *     } else if (x > y) {
1030     *         return 1;
1031     *     } else if (x < y) {
1032     *         return -1;
1033     *     } else {
1034     *         return -1;
1035     *     }
1036     * }
1037     * On entry:
1038     *    r0 = &op1 [vBB]
1039     *    r1 = &op2 [vCC]
1040     */
1041    /* op vAA, vBB, vCC */
1042    flds    s0, [r0]                    @ d0<- vBB
1043    flds    s1, [r1]                    @ d1<- vCC
1044    fcmps  s0, s1                      @ compare (vBB, vCC)
1045    mvn     r0, #0                      @ r0<- -1 (default)
1046    fmstat                              @ export status flags
1047    movgt   r0, #1                      @ (greater than) r0<- 1
1048    moveq   r0, #0                      @ (equal) r0<- 0
1049    bx      lr
1050
1051/* ------------------------------ */
1052    .balign 4
1053    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
1054dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
1055/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
1056    /*
1057     * 64-bit floating point vfp sqrt operation.
1058     * If the result is a NaN, bail out to library code to do
1059     * the right thing.
1060     *
1061     * On entry:
1062     *     r2 src addr of op1
1063     * On exit:
1064     *     r0,r1 = res
1065     */
1066    fldd    d0, [r2]
1067    fsqrtd  d1, d0
1068    fcmpd   d1, d1
1069    fmstat
1070    fmrrd   r0, r1, d1
1071    bxeq    lr   @ Result OK - return
1072    ldr     r2, .Lsqrt
1073    fmrrd   r0, r1, d0   @ reload orig operand
1074    bx      r2   @ tail call to sqrt library routine
1075
1076.Lsqrt:
1077    .word   sqrt
1078
1079/* ------------------------------ */
1080    .balign 4
1081    .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
1082dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
1083/* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
1084    /*
1085     * Throw an exception from JIT'ed code.
1086     * On entry:
1087     *    r0    Dalvik PC that raises the exception
1088     */
1089    b       .LhandleException
1090
1091/* ------------------------------ */
1092    .balign 4
1093    .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
1094dvmCompiler_TEMPLATE_MEM_OP_DECODE:
1095/* File: armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S */
1096#if defined(WITH_SELF_VERIFICATION)
1097    /*
1098     * This handler encapsulates heap memory ops for selfVerification mode.
1099     *
1100     * The call to the handler is inserted prior to a heap memory operation.
1101     * This handler then calls a function to decode the memory op, and process
1102     * it accordingly. Afterwards, the handler changes the return address to
1103     * skip the memory op so it never gets executed.
1104     */
1105    vpush   {d0-d15}                    @ save out all fp registers
1106    push    {r0-r12,lr}                 @ save out all registers
1107    mov     r0, lr                      @ arg0 <- link register
1108    mov     r1, sp                      @ arg1 <- stack pointer
1109    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
1110    blx     r2                          @ decode and handle the mem op
1111    pop     {r0-r12,lr}                 @ restore all registers
1112    vpop    {d0-d15}                    @ restore all fp registers
1113    bx      lr                          @ return to compiled code
1114#endif
1115
1116/* ------------------------------ */
1117    .balign 4
1118    .global dvmCompiler_TEMPLATE_STRING_COMPARETO
1119dvmCompiler_TEMPLATE_STRING_COMPARETO:
1120/* File: armv5te/TEMPLATE_STRING_COMPARETO.S */
1121    /*
1122     * String's compareTo.
1123     *
1124     * Requires r0/r1 to have been previously checked for null.  Will
1125     * return negative if this's string is < comp, 0 if they are the
1126     * same and positive if >.
1127     *
1128     * IMPORTANT NOTE:
1129     *
1130     * This code relies on hard-coded offsets for string objects, and must be
1131     * kept in sync with definitions in UtfString.h.  See asm-constants.h
1132     *
1133     * On entry:
1134     *    r0:   this object pointer
1135     *    r1:   comp object pointer
1136     *
1137     */
1138
1139    mov    r2, r0         @ this to r2, opening up r0 for return value
1140    subs   r0, r2, r1     @ Same?
1141    bxeq   lr
1142
1143    ldr    r4, [r2, #STRING_FIELDOFF_OFFSET]
1144    ldr    r9, [r1, #STRING_FIELDOFF_OFFSET]
1145    ldr    r7, [r2, #STRING_FIELDOFF_COUNT]
1146    ldr    r10, [r1, #STRING_FIELDOFF_COUNT]
1147    ldr    r2, [r2, #STRING_FIELDOFF_VALUE]
1148    ldr    r1, [r1, #STRING_FIELDOFF_VALUE]
1149
1150    /*
1151     * At this point, we have:
1152     *    value:  r2/r1
1153     *    offset: r4/r9
1154     *    count:  r7/r10
1155     * We're going to compute
1156     *    r11 <- countDiff
1157     *    r10 <- minCount
1158     */
1159     subs  r11, r7, r10
1160     movls r10, r7
1161
1162     /* Now, build pointers to the string data */
1163     add   r2, r2, r4, lsl #1
1164     add   r1, r1, r9, lsl #1
1165     /*
1166      * Note: data pointers point to previous element so we can use pre-index
1167      * mode with base writeback.
1168      */
1169     add   r2, #16-2   @ offset to contents[-1]
1170     add   r1, #16-2   @ offset to contents[-1]
1171
1172     /*
1173      * At this point we have:
1174      *   r2: *this string data
1175      *   r1: *comp string data
1176      *   r10: iteration count for comparison
1177      *   r11: value to return if the first part of the string is equal
1178      *   r0: reserved for result
1179      *   r3, r4, r7, r8, r9, r12 available for loading string data
1180      */
1181
1182    subs  r10, #2
1183    blt   do_remainder2
1184
1185      /*
1186       * Unroll the first two checks so we can quickly catch early mismatch
1187       * on long strings (but preserve incoming alignment)
1188       */
1189
1190    ldrh  r3, [r2, #2]!
1191    ldrh  r4, [r1, #2]!
1192    ldrh  r7, [r2, #2]!
1193    ldrh  r8, [r1, #2]!
1194    subs  r0, r3, r4
1195    subeqs  r0, r7, r8
1196    bxne  lr
1197    cmp   r10, #28
1198    bgt   do_memcmp16
1199    subs  r10, #3
1200    blt   do_remainder
1201
1202loopback_triple:
1203    ldrh  r3, [r2, #2]!
1204    ldrh  r4, [r1, #2]!
1205    ldrh  r7, [r2, #2]!
1206    ldrh  r8, [r1, #2]!
1207    ldrh  r9, [r2, #2]!
1208    ldrh  r12,[r1, #2]!
1209    subs  r0, r3, r4
1210    subeqs  r0, r7, r8
1211    subeqs  r0, r9, r12
1212    bxne  lr
1213    subs  r10, #3
1214    bge   loopback_triple
1215
1216do_remainder:
1217    adds  r10, #3
1218    beq   returnDiff
1219
1220loopback_single:
1221    ldrh  r3, [r2, #2]!
1222    ldrh  r4, [r1, #2]!
1223    subs  r0, r3, r4
1224    bxne  lr
1225    subs  r10, #1
1226    bne     loopback_single
1227
1228returnDiff:
1229    mov   r0, r11
1230    bx    lr
1231
1232do_remainder2:
1233    adds  r10, #2
1234    bne   loopback_single
1235    mov   r0, r11
1236    bx    lr
1237
1238    /* Long string case */
1239do_memcmp16:
1240    mov   r4, lr
1241    ldr   lr, .Lmemcmp16
1242    mov   r7, r11
1243    add   r0, r2, #2
1244    add   r1, r1, #2
1245    mov   r2, r10
1246    blx   lr
1247    cmp   r0, #0
1248    bxne  r4
1249    mov   r0, r7
1250    bx    r4
1251
1252.Lmemcmp16:
1253    .word __memcmp16
1254
1255/* ------------------------------ */
1256    .balign 4
1257    .global dvmCompiler_TEMPLATE_STRING_INDEXOF
1258dvmCompiler_TEMPLATE_STRING_INDEXOF:
1259/* File: armv5te/TEMPLATE_STRING_INDEXOF.S */
1260    /*
1261     * String's indexOf.
1262     *
1263     * Requires r0 to have been previously checked for null.  Will
1264     * return index of match of r1 in r0.
1265     *
1266     * IMPORTANT NOTE:
1267     *
1268     * This code relies on hard-coded offsets for string objects, and must be
1269     * kept in sync wth definitions in UtfString.h  See asm-constants.h
1270     *
1271     * On entry:
1272     *    r0:   string object pointer
1273     *    r1:   char to match
1274     *    r2:   Starting offset in string data
1275     */
1276
1277    ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
1278    ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
1279    ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
1280
1281    /*
1282     * At this point, we have:
1283     *    r0: object pointer
1284     *    r1: char to match
1285     *    r2: starting offset
1286     *    r7: offset
1287     *    r8: string length
1288     */
1289
1290     /* Build pointer to start of string data */
1291     add   r0, #16
1292     add   r0, r0, r7, lsl #1
1293
1294     /* Save a copy of starting data in r7 */
1295     mov   r7, r0
1296
1297     /* Clamp start to [0..count] */
1298     cmp   r2, #0
1299     movlt r2, #0
1300     cmp   r2, r8
1301     movgt r2, r8
1302
1303     /* Build pointer to start of data to compare and pre-bias */
1304     add   r0, r0, r2, lsl #1
1305     sub   r0, #2
1306
1307     /* Compute iteration count */
1308     sub   r8, r2
1309
1310     /*
1311      * At this point we have:
1312      *   r0: start of data to test
1313      *   r1: chat to compare
1314      *   r8: iteration count
1315      *   r7: original start of string
1316      *   r3, r4, r9, r10, r11, r12 available for loading string data
1317      */
1318
1319    subs  r8, #4
1320    blt   indexof_remainder
1321
1322indexof_loop4:
1323    ldrh  r3, [r0, #2]!
1324    ldrh  r4, [r0, #2]!
1325    ldrh  r10, [r0, #2]!
1326    ldrh  r11, [r0, #2]!
1327    cmp   r3, r1
1328    beq   match_0
1329    cmp   r4, r1
1330    beq   match_1
1331    cmp   r10, r1
1332    beq   match_2
1333    cmp   r11, r1
1334    beq   match_3
1335    subs  r8, #4
1336    bge   indexof_loop4
1337
1338indexof_remainder:
1339    adds    r8, #4
1340    beq     indexof_nomatch
1341
1342indexof_loop1:
1343    ldrh  r3, [r0, #2]!
1344    cmp   r3, r1
1345    beq   match_3
1346    subs  r8, #1
1347    bne   indexof_loop1
1348
1349indexof_nomatch:
1350    mov   r0, #-1
1351    bx    lr
1352
1353match_0:
1354    sub   r0, #6
1355    sub   r0, r7
1356    asr   r0, r0, #1
1357    bx    lr
1358match_1:
1359    sub   r0, #4
1360    sub   r0, r7
1361    asr   r0, r0, #1
1362    bx    lr
1363match_2:
1364    sub   r0, #2
1365    sub   r0, r7
1366    asr   r0, r0, #1
1367    bx    lr
1368match_3:
1369    sub   r0, r7
1370    asr   r0, r0, #1
1371    bx    lr
1372
1373/* ------------------------------ */
1374    .balign 4
1375    .global dvmCompiler_TEMPLATE_INTERPRET
1376dvmCompiler_TEMPLATE_INTERPRET:
1377/* File: armv5te/TEMPLATE_INTERPRET.S */
1378    /*
1379     * This handler transfers control to the interpeter without performing
1380     * any lookups.  It may be called either as part of a normal chaining
1381     * operation, or from the transition code in header.S.  We distinquish
1382     * the two cases by looking at the link register.  If called from a
1383     * translation chain, it will point to the chaining Dalvik PC -3.
1384     * On entry:
1385     *    lr - if NULL:
1386     *        r1 - the Dalvik PC to begin interpretation.
1387     *    else
1388     *        [lr, #3] contains Dalvik PC to begin interpretation
1389     *    rGLUE - pointer to interpState
1390     *    rFP - Dalvik frame pointer
1391     */
1392    cmp     lr, #0
1393    ldrne   r1,[lr, #3]
1394    ldr     r2, .LinterpPunt
1395    mov     r0, r1                       @ set Dalvik PC
1396    bx      r2
1397    @ doesn't return
1398
1399.LinterpPunt:
1400    .word   dvmJitToInterpPunt
1401
1402/* ------------------------------ */
1403    .balign 4
1404    .global dvmCompiler_TEMPLATE_MONITOR_ENTER
1405dvmCompiler_TEMPLATE_MONITOR_ENTER:
1406/* File: armv5te/TEMPLATE_MONITOR_ENTER.S */
1407    /*
1408     * Call out to the runtime to lock an object.  Because this thread
1409     * may have been suspended in THREAD_MONITOR state and the Jit's
1410     * translation cache subsequently cleared, we cannot return directly.
1411     * Instead, unconditionally transition to the interpreter to resume.
1412     *
1413     * On entry:
1414     *    r0 - self pointer
1415     *    r1 - the object (which has already been null-checked by the caller
1416     *    r4 - the Dalvik PC of the following instruction.
1417     */
1418    ldr     r2, .LdvmLockObject
1419    mov     r3, #0                       @ Record that we're not returning
1420    str     r3, [r0, #offThread_inJitCodeCache]
1421    blx     r2                           @ dvmLockObject(self, obj)
1422    @ refresh Jit's on/off status
1423    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1424    ldr     r0, [r0]
1425    ldr     r2, .LdvmJitToInterpNoChain
1426    str     r0, [rGLUE, #offGlue_pJitProfTable]
1427    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1428#if defined(WITH_JIT_TUNING)
1429    mov     r0, #kHeavyweightMonitor
1430#endif
1431    bx      r2
1432
1433/* ------------------------------ */
1434    .balign 4
1435    .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
1436dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
1437/* File: armv5te/TEMPLATE_MONITOR_ENTER_DEBUG.S */
1438    /*
1439     * To support deadlock prediction, this version of MONITOR_ENTER
1440     * will always call the heavyweight dvmLockObject, check for an
1441     * exception and then bail out to the interpreter.
1442     *
1443     * On entry:
1444     *    r0 - self pointer
1445     *    r1 - the object (which has already been null-checked by the caller
1446     *    r4 - the Dalvik PC of the following instruction.
1447     *
1448     */
1449    ldr     r2, .LdvmLockObject
1450    mov     r3, #0                       @ Record that we're not returning
1451    str     r3, [r0, #offThread_inJitCodeCache]
1452    blx     r2             @ dvmLockObject(self, obj)
1453    @ refresh Jit's on/off status & test for exception
1454    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1455    ldr     r1, [rGLUE, #offGlue_self]
1456    ldr     r0, [r0]
1457    ldr     r1, [r1, #offThread_exception]
1458    str     r0, [rGLUE, #offGlue_pJitProfTable]
1459    cmp     r1, #0
1460    beq     1f
1461    ldr     r2, .LhandleException
1462    sub     r0, r4, #2     @ roll dPC back to this monitor instruction
1463    bx      r2
14641:
1465    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1466#if defined(WITH_JIT_TUNING)
1467    mov     r0, #kHeavyweightMonitor
1468#endif
1469    ldr     pc, .LdvmJitToInterpNoChain
1470
1471    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
1472/* File: armv5te/footer.S */
1473/*
1474 * ===========================================================================
1475 *  Common subroutines and data
1476 * ===========================================================================
1477 */
1478
1479    .text
1480    .align  2
1481.LinvokeNative:
1482    @ Prep for the native call
1483    @ r1 = newFP, r0 = methodToCall
1484    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
1485    mov     r2, #0
1486    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
1487    str     r2, [r3, #offThread_inJitCodeCache] @ not in jit code cache
1488    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
1489    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
1490                                        @ newFp->localRefCookie=top
1491    mov     r9, r3                      @ r9<- glue->self (preserve)
1492    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
1493
1494    mov     r2, r0                      @ r2<- methodToCall
1495    mov     r0, r1                      @ r0<- newFP
1496    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
1497#if defined(WITH_INLINE_PROFILING)
1498    @ r2: methodToCall, r6: rGLUE
1499    stmfd   sp!, {r2,r6}
1500    stmfd   sp!, {r0-r3}
1501    mov     r0, r2
1502    mov     r1, r6
1503    LDR_PC_LR ".LdvmFastMethodTraceEnter"
1504    ldmfd   sp!, {r0-r3}
1505#endif
1506
1507    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
1508
1509#if defined(WITH_INLINE_PROFILING)
1510    ldmfd   sp!, {r0-r1}
1511    LDR_PC_LR ".LdvmFastNativeMethodTraceExit"
1512#endif
1513    @ Refresh Jit's on/off status
1514    ldr     r3, [rGLUE, #offGlue_ppJitProfTable]
1515
1516    @ native return; r9=self, r10=newSaveArea
1517    @ equivalent to dvmPopJniLocals
1518    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
1519    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
1520    ldr     r1, [r9, #offThread_exception] @ check for exception
1521    ldr     r3, [r3]    @ r1 <- pointer to Jit profile table
1522    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
1523    cmp     r1, #0                      @ null?
1524    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
1525    ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
1526    str     r3, [rGLUE, #offGlue_pJitProfTable]  @ cache current JitProfTable
1527
1528    @ r0 = dalvikCallsitePC
1529    bne     .LhandleException           @ no, handle exception
1530
1531    str     r2, [r9, #offThread_inJitCodeCache] @ set the new mode
1532    cmp     r2, #0                      @ return chaining cell still exists?
1533    bxne    r2                          @ yes - go ahead
1534
1535    @ continue executing the next instruction through the interpreter
1536    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
1537    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
1538#if defined(WITH_JIT_TUNING)
1539    mov     r0, #kCallsiteInterpreted
1540#endif
1541    mov     pc, r1
1542
1543/*
1544 * On entry:
1545 * r0  Faulting Dalvik PC
1546 */
1547.LhandleException:
1548#if defined(WITH_SELF_VERIFICATION)
1549    ldr     pc, .LdeadFood @ should not see this under self-verification mode
1550.LdeadFood:
1551    .word   0xdeadf00d
1552#endif
1553    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
1554    mov     r2, #0
1555    str     r2, [r3, #offThread_inJitCodeCache] @ in interpreter land
1556    ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
1557    ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
1558    mov     rPC, r0                 @ reload the faulting Dalvik address
1559    mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
1560
1561    .align  2
1562.LdvmAsmInstructionStart:
1563    .word   dvmAsmInstructionStart
1564.LdvmJitToInterpNoChainNoProfile:
1565    .word   dvmJitToInterpNoChainNoProfile
1566.LdvmJitToInterpTraceSelectNoChain:
1567    .word   dvmJitToInterpTraceSelectNoChain
1568.LdvmJitToInterpNoChain:
1569    .word   dvmJitToInterpNoChain
1570.LdvmMterpStdBail:
1571    .word   dvmMterpStdBail
1572.LdvmMterpCommonExceptionThrown:
1573    .word   dvmMterpCommonExceptionThrown
1574.LdvmLockObject:
1575    .word   dvmLockObject
1576#if defined(WITH_JIT_TUNING)
1577.LdvmICHitCount:
1578    .word   gDvmICHitCount
1579#endif
1580#if defined(WITH_SELF_VERIFICATION)
1581.LdvmSelfVerificationMemOpDecode:
1582    .word   dvmSelfVerificationMemOpDecode
1583#endif
1584#if defined(WITH_INLINE_PROFILING)
1585.LdvmFastMethodTraceEnter:
1586    .word   dvmFastMethodTraceEnter
1587.LdvmFastNativeMethodTraceExit:
1588    .word   dvmFastNativeMethodTraceExit
1589.LdvmFastJavaMethodTraceExit:
1590    .word   dvmFastJavaMethodTraceExit
1591#endif
1592.L__aeabi_cdcmple:
1593    .word   __aeabi_cdcmple
1594.L__aeabi_cfcmple:
1595    .word   __aeabi_cfcmple
1596
1597    .global dmvCompilerTemplateEnd
1598dmvCompilerTemplateEnd:
1599
1600#endif /* WITH_JIT */
1601
1602